panfrost: Counter definitions
authorAntonio Caggiano <antonio.caggiano@collabora.com>
Thu, 15 Apr 2021 08:08:38 +0000 (10:08 +0200)
committerMarge Bot <eric+marge@anholt.net>
Thu, 27 May 2021 13:24:54 +0000 (13:24 +0000)
Add Mali events XML files generated with Panfrost HWC helper.
https://gitlab.freedesktop.org/fahien/panfrost-hwc-helper/

v2: Restore license headers.
v3: Fix shader core and memory system counter offsets.

Signed-off-by: Antonio Caggiano <antonio.caggiano@collabora.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10844>

16 files changed:
src/panfrost/perf/G31.xml [new file with mode: 0644]
src/panfrost/perf/G51.xml [new file with mode: 0644]
src/panfrost/perf/G52.xml [new file with mode: 0644]
src/panfrost/perf/G57.xml [new file with mode: 0644]
src/panfrost/perf/G68.xml [new file with mode: 0644]
src/panfrost/perf/G71.xml [new file with mode: 0644]
src/panfrost/perf/G72.xml [new file with mode: 0644]
src/panfrost/perf/G76.xml [new file with mode: 0644]
src/panfrost/perf/G77.xml [new file with mode: 0644]
src/panfrost/perf/G78.xml [new file with mode: 0644]
src/panfrost/perf/T72x.xml [new file with mode: 0644]
src/panfrost/perf/T76x.xml [new file with mode: 0644]
src/panfrost/perf/T82x.xml [new file with mode: 0644]
src/panfrost/perf/T83x.xml [new file with mode: 0644]
src/panfrost/perf/T86x.xml [new file with mode: 0644]
src/panfrost/perf/T88x.xml [new file with mode: 0644]

diff --git a/src/panfrost/perf/G31.xml b/src/panfrost/perf/G31.xml
new file mode 100644 (file)
index 0000000..4fef366
--- /dev/null
@@ -0,0 +1,177 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="TDVx">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles" />
+        <event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles" />
+        <event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" />
+        <event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks" />
+        <event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles" />
+        <event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles" />
+        <event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles" />
+        <event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" />
+        <event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks" />
+        <event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles" />
+        <event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles" />
+        <event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles" />
+        <event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" />
+        <event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks" />
+        <event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles" />
+        <event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles" />
+        <event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles" />
+        <event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles" />
+        <event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="68" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles" />
+        <event offset="70" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives" />
+        <event offset="71" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives" />
+        <event offset="72" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives" />
+        <event offset="73" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" />
+        <event offset="74" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" />
+        <event offset="75" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" />
+        <event offset="76" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" />
+        <event offset="77" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" />
+        <event offset="78" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" />
+        <event offset="81" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" />
+        <event offset="83" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" />
+        <event offset="85" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests" />
+        <event offset="87" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles" />
+        <event offset="88" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles" />
+        <event offset="90" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="91" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests" />
+        <event offset="95" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles" />
+        <event offset="98" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="99" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" />
+        <event offset="101" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests" />
+        <event offset="102" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles" />
+        <event offset="118" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles" />
+        <event offset="119" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles" />
+        <event offset="197" advanced="yes" counter="FRAG_PRIMITIVES" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives" />
+        <event offset="198" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" />
+        <event offset="199" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles" />
+        <event offset="201" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps" />
+        <event offset="202" counter="FRAG_PARTIAL_WARPS" title="Core Warps" name="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps" />
+        <event offset="203" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads" />
+        <event offset="204" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" />
+        <event offset="205" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" />
+        <event offset="206" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" />
+        <event offset="207" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" />
+        <event offset="208" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" />
+        <event offset="210" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles" />
+        <event offset="211" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles" />
+        <event offset="212" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" />
+        <event offset="214" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles" />
+        <event offset="215" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" />
+        <event offset="216" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps" />
+        <event offset="217" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles" />
+        <event offset="218" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles" />
+        <event offset="219" advanced="yes" counter="EXEC_ACTIVE" title="Core Cycles" name="Execution engine active" description="The number of cycles where the execution engine unit is processing at least one thread." units="cycles" />
+        <event offset="220" counter="EXEC_INSTR_COUNT" title="Core EE Instructions" name="Executed instructions" description="The number of instructions executed per warp." units="instructions" />
+        <event offset="221" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions" />
+        <event offset="222" advanced="yes" counter="EXEC_INSTR_STARVING" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where no new threads are available for execution." units="cycles" />
+        <event offset="223" advanced="yes" counter="ARITH_INSTR_SINGLE_FMA" title="Core EE Instructions" name="Arithmetic instructions" description="The number of instructions where the workload is a single FMA pipe arithmetic operation." units="instructions" />
+        <event offset="224" advanced="yes" counter="ARITH_INSTR_DOUBLE" title="Core EE Instructions" name="Dual Arithmetic instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe arithmetic operation." units="instructions" />
+        <event offset="225" advanced="yes" counter="ARITH_INSTR_MSG" title="Core EE Instructions" name="Arithmetic + Message instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe message operation" units="instructions" />
+        <event offset="226" advanced="yes" counter="ARITH_INSTR_MSG_ONLY" title="Core EE Instructions" name="Message instructions" description="The number of instructions where the workload is a single ADD pipe message operation, with no FMA pipe operation" units="instructions" />
+        <event offset="227" counter="TEX_MSGI_NUM_QUADS" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads" />
+        <event offset="228" counter="TEX_DFCH_NUM_PASSES" title="Core Texture Quads" name="Texture issues" description="The number of quad-width filtering passes." units="issues" />
+        <event offset="229" counter="TEX_DFCH_NUM_PASSES_MISS" title="Core Texture Quads" name="Descriptor misses" description="The number of quad-width filtering passes that miss in the resource or sampler descriptor cache." units="requests" />
+        <event offset="230" counter="TEX_DFCH_NUM_PASSES_MIP_MAP" title="Core Texture Quads" name="Mipmapped texture issues" description="The number of quad-width filtering passes that use a mipmapped texture." units="issues" />
+        <event offset="231" counter="TEX_TIDX_NUM_SPLIT_MIP_MAP" title="Core Texture Quads" name="Trilinear filtered issues" description="The number of quad-width filtering passes that use a trilinear filter." units="issues" />
+        <event offset="232" counter="TEX_TFCH_NUM_LINES_FETCHED" title="Core Texture Line Fetches" name="Line fetches" description="The number of texture line fetches from the L2 cache." units="issues" />
+        <event offset="233" counter="TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED" title="Core Texture Line Fetches" name="Compressed line fetches" description="The number of texture line fetches from the L2 cache that are block compressed textures." units="issues" />
+        <event offset="234" counter="TEX_TFCH_NUM_OPERATIONS" title="Core Texture Cycles" name="Cache lookups" description="The number of texture cache lookup cycles." units="requests" />
+        <event offset="235" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles" />
+        <event offset="236" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles" />
+        <event offset="237" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles" />
+        <event offset="238" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles" />
+        <event offset="239" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles" />
+        <event offset="240" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles" />
+        <event offset="241" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions" />
+        <event offset="242" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="243" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="244" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions" />
+        <event offset="245" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions" />
+        <event offset="246" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats" />
+        <event offset="247" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="248" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats" />
+        <event offset="249" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="250" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats" />
+        <event offset="251" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="252" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats" />
+        <event offset="255" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats" />
+        <event offset="254" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats" />
+        <event offset="253" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats" />
+    </category>
+    <category name="Memory System" per_cpu="no">
+        <event offset="132" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests" />
+        <event offset="144" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests" />
+        <event offset="145" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles" />
+        <event offset="146" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests" />
+        <event offset="147" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles" />
+        <event offset="148" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests" />
+        <event offset="149" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles" />
+        <event offset="150" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests" />
+        <event offset="151" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles" />
+        <event offset="152" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests" />
+        <event offset="153" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests" />
+        <event offset="154" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests" />
+        <event offset="155" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests" />
+        <event offset="156" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests" />
+        <event offset="157" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions" />
+        <event offset="158" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" />
+        <event offset="159" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" />
+        <event offset="160" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats" />
+        <event offset="161" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles" />
+        <event offset="162" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="163" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="164" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="165" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" />
+        <event offset="166" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" />
+        <event offset="167" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" />
+        <event offset="168" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" />
+        <event offset="169" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" />
+        <event offset="170" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions" />
+        <event offset="171" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" />
+        <event offset="172" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" />
+        <event offset="173" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" />
+        <event offset="174" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" />
+        <event offset="175" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats" />
+        <event offset="176" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles" />
+        <event offset="177" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="178" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="179" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="180" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions" />
+        <event offset="181" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file
diff --git a/src/panfrost/perf/G51.xml b/src/panfrost/perf/G51.xml
new file mode 100644 (file)
index 0000000..3bb25c7
--- /dev/null
@@ -0,0 +1,177 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="TSIx">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles" />
+        <event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles" />
+        <event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" />
+        <event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks" />
+        <event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles" />
+        <event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles" />
+        <event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles" />
+        <event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" />
+        <event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks" />
+        <event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles" />
+        <event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles" />
+        <event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles" />
+        <event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" />
+        <event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks" />
+        <event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles" />
+        <event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles" />
+        <event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles" />
+        <event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles" />
+        <event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="68" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles" />
+        <event offset="70" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives" />
+        <event offset="71" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives" />
+        <event offset="72" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives" />
+        <event offset="73" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" />
+        <event offset="74" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" />
+        <event offset="75" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" />
+        <event offset="76" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" />
+        <event offset="77" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" />
+        <event offset="78" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" />
+        <event offset="81" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" />
+        <event offset="83" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" />
+        <event offset="85" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests" />
+        <event offset="87" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles" />
+        <event offset="88" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles" />
+        <event offset="90" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="91" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests" />
+        <event offset="95" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles" />
+        <event offset="98" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="99" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" />
+        <event offset="101" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests" />
+        <event offset="102" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles" />
+        <event offset="118" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles" />
+        <event offset="119" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles" />
+        <event offset="197" advanced="yes" counter="FRAG_PRIMITIVES" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives" />
+        <event offset="198" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" />
+        <event offset="199" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles" />
+        <event offset="201" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps" />
+        <event offset="202" counter="FRAG_PARTIAL_WARPS" title="Core Warps" name="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps" />
+        <event offset="203" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads" />
+        <event offset="204" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" />
+        <event offset="205" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" />
+        <event offset="206" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" />
+        <event offset="207" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" />
+        <event offset="208" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" />
+        <event offset="210" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles" />
+        <event offset="211" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles" />
+        <event offset="212" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" />
+        <event offset="214" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles" />
+        <event offset="215" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" />
+        <event offset="216" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps" />
+        <event offset="217" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles" />
+        <event offset="218" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles" />
+        <event offset="219" advanced="yes" counter="EXEC_ACTIVE" title="Core Cycles" name="Execution engine active" description="The number of cycles where the execution engine unit is processing at least one thread." units="cycles" />
+        <event offset="220" counter="EXEC_INSTR_COUNT" title="Core EE Instructions" name="Executed instructions" description="The number of instructions executed per warp." units="instructions" />
+        <event offset="221" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions" />
+        <event offset="222" advanced="yes" counter="EXEC_INSTR_STARVING" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where no new threads are available for execution." units="cycles" />
+        <event offset="223" advanced="yes" counter="ARITH_INSTR_SINGLE_FMA" title="Core EE Instructions" name="Arithmetic instructions" description="The number of instructions where the workload is a single FMA pipe arithmetic operation." units="instructions" />
+        <event offset="224" advanced="yes" counter="ARITH_INSTR_DOUBLE" title="Core EE Instructions" name="Dual Arithmetic instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe arithmetic operation." units="instructions" />
+        <event offset="225" advanced="yes" counter="ARITH_INSTR_MSG" title="Core EE Instructions" name="Arithmetic + Message instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe message operation" units="instructions" />
+        <event offset="226" advanced="yes" counter="ARITH_INSTR_MSG_ONLY" title="Core EE Instructions" name="Message instructions" description="The number of instructions where the workload is a single ADD pipe message operation, with no FMA pipe operation" units="instructions" />
+        <event offset="227" counter="TEX_MSGI_NUM_QUADS" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads" />
+        <event offset="228" counter="TEX_DFCH_NUM_PASSES" title="Core Texture Quads" name="Texture issues" description="The number of quad-width filtering passes." units="issues" />
+        <event offset="229" counter="TEX_DFCH_NUM_PASSES_MISS" title="Core Texture Quads" name="Descriptor misses" description="The number of quad-width filtering passes that miss in the resource or sampler descriptor cache." units="requests" />
+        <event offset="230" counter="TEX_DFCH_NUM_PASSES_MIP_MAP" title="Core Texture Quads" name="Mipmapped texture issues" description="The number of quad-width filtering passes that use a mipmapped texture." units="issues" />
+        <event offset="231" counter="TEX_TIDX_NUM_SPLIT_MIP_MAP" title="Core Texture Quads" name="Trilinear filtered issues" description="The number of quad-width filtering passes that use a trilinear filter." units="issues" />
+        <event offset="232" counter="TEX_TFCH_NUM_LINES_FETCHED" title="Core Texture Line Fetches" name="Line fetches" description="The number of texture line fetches from the L2 cache." units="issues" />
+        <event offset="233" counter="TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED" title="Core Texture Line Fetches" name="Compressed line fetches" description="The number of texture line fetches from the L2 cache that are block compressed textures." units="issues" />
+        <event offset="234" counter="TEX_TFCH_NUM_OPERATIONS" title="Core Texture Cycles" name="Cache lookups" description="The number of texture cache lookup cycles." units="requests" />
+        <event offset="235" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles" />
+        <event offset="236" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles" />
+        <event offset="237" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles" />
+        <event offset="238" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles" />
+        <event offset="239" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles" />
+        <event offset="240" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles" />
+        <event offset="241" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions" />
+        <event offset="242" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="243" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="244" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions" />
+        <event offset="245" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions" />
+        <event offset="246" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats" />
+        <event offset="247" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="248" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats" />
+        <event offset="249" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="250" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats" />
+        <event offset="251" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="252" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats" />
+        <event offset="253" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats" />
+        <event offset="254" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats" />
+        <event offset="255" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats" />
+    </category>
+    <category name="Memory System" per_cpu="no">
+        <event offset="132" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests" />
+        <event offset="144" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests" />
+        <event offset="145" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles" />
+        <event offset="146" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests" />
+        <event offset="147" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles" />
+        <event offset="148" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests" />
+        <event offset="149" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles" />
+        <event offset="150" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests" />
+        <event offset="151" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles" />
+        <event offset="152" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests" />
+        <event offset="153" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests" />
+        <event offset="154" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests" />
+        <event offset="155" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests" />
+        <event offset="156" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests" />
+        <event offset="157" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions" />
+        <event offset="158" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" />
+        <event offset="159" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" />
+        <event offset="160" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats" />
+        <event offset="161" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles" />
+        <event offset="162" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="163" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="164" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="165" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" />
+        <event offset="166" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" />
+        <event offset="167" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" />
+        <event offset="168" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" />
+        <event offset="169" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" />
+        <event offset="170" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions" />
+        <event offset="171" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" />
+        <event offset="172" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" />
+        <event offset="173" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" />
+        <event offset="174" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" />
+        <event offset="175" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats" />
+        <event offset="176" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles" />
+        <event offset="177" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="178" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="179" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="180" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions" />
+        <event offset="181" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file
diff --git a/src/panfrost/perf/G52.xml b/src/panfrost/perf/G52.xml
new file mode 100644 (file)
index 0000000..02b21da
--- /dev/null
@@ -0,0 +1,179 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="TGOx">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles" />
+        <event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles" />
+        <event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" />
+        <event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks" />
+        <event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles" />
+        <event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles" />
+        <event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles" />
+        <event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" />
+        <event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks" />
+        <event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles" />
+        <event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles" />
+        <event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles" />
+        <event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" />
+        <event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks" />
+        <event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles" />
+        <event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles" />
+        <event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles" />
+        <event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles" />
+        <event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="68" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles" />
+        <event offset="70" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives" />
+        <event offset="71" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives" />
+        <event offset="72" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives" />
+        <event offset="73" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" />
+        <event offset="74" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" />
+        <event offset="75" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" />
+        <event offset="76" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" />
+        <event offset="77" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" />
+        <event offset="78" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" />
+        <event offset="81" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" />
+        <event offset="83" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" />
+        <event offset="85" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests" />
+        <event offset="87" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles" />
+        <event offset="88" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles" />
+        <event offset="90" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="91" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests" />
+        <event offset="95" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles" />
+        <event offset="98" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="99" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" />
+        <event offset="101" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests" />
+        <event offset="102" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles" />
+        <event offset="118" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles" />
+        <event offset="119" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles" />
+        <event offset="197" advanced="yes" counter="FRAG_PRIMITIVES" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives" />
+        <event offset="198" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" />
+        <event offset="199" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles" />
+        <event offset="201" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps" />
+        <event offset="202" counter="FRAG_PARTIAL_WARPS" title="Core Warps" name="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps" />
+        <event offset="203" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads" />
+        <event offset="204" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" />
+        <event offset="205" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" />
+        <event offset="206" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" />
+        <event offset="207" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" />
+        <event offset="208" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" />
+        <event offset="209" counter="WARP_REG_SIZE_64" title="Core Warps" name="All register warps" description="The number of warps that require more than 32 registers." units="warps" />
+        <event offset="210" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles" />
+        <event offset="211" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles" />
+        <event offset="212" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" />
+        <event offset="213" counter="FULL_QUAD_WARPS" title="Core Warps" name="Full quad warps" description="The number of warps that are fully populated with quads." units="warps" />
+        <event offset="214" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles" />
+        <event offset="215" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" />
+        <event offset="216" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps" />
+        <event offset="217" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles" />
+        <event offset="218" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles" />
+        <event offset="219" advanced="yes" counter="EXEC_ACTIVE" title="Core Cycles" name="Execution engine active" description="The number of cycles where the execution engine unit is processing at least one thread." units="cycles" />
+        <event offset="220" counter="EXEC_INSTR_COUNT" title="Core EE Instructions" name="Executed instructions" description="The number of instructions executed per warp." units="instructions" />
+        <event offset="221" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions" />
+        <event offset="222" advanced="yes" counter="EXEC_INSTR_STARVING" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where no new threads are available for execution." units="cycles" />
+        <event offset="223" advanced="yes" counter="ARITH_INSTR_SINGLE_FMA" title="Core EE Instructions" name="Arithmetic instructions" description="The number of instructions where the workload is a single FMA pipe arithmetic operation." units="instructions" />
+        <event offset="224" advanced="yes" counter="ARITH_INSTR_DOUBLE" title="Core EE Instructions" name="Dual Arithmetic instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe arithmetic operation." units="instructions" />
+        <event offset="225" advanced="yes" counter="ARITH_INSTR_MSG" title="Core EE Instructions" name="Arithmetic + Message instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe message operation" units="instructions" />
+        <event offset="226" advanced="yes" counter="ARITH_INSTR_MSG_ONLY" title="Core EE Instructions" name="Message instructions" description="The number of instructions where the workload is a single ADD pipe message operation, with no FMA pipe operation" units="instructions" />
+        <event offset="227" counter="TEX_MSGI_NUM_QUADS" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads" />
+        <event offset="228" counter="TEX_DFCH_NUM_PASSES" title="Core Texture Quads" name="Texture issues" description="The number of quad-width filtering passes." units="issues" />
+        <event offset="229" counter="TEX_DFCH_NUM_PASSES_MISS" title="Core Texture Quads" name="Descriptor misses" description="The number of quad-width filtering passes that miss in the resource or sampler descriptor cache." units="requests" />
+        <event offset="230" counter="TEX_DFCH_NUM_PASSES_MIP_MAP" title="Core Texture Quads" name="Mipmapped texture issues" description="The number of quad-width filtering passes that use a mipmapped texture." units="issues" />
+        <event offset="231" counter="TEX_TIDX_NUM_SPLIT_MIP_MAP" title="Core Texture Quads" name="Trilinear filtered issues" description="The number of quad-width filtering passes that use a trilinear filter." units="issues" />
+        <event offset="232" counter="TEX_TFCH_NUM_LINES_FETCHED" title="Core Texture Line Fetches" name="Line fetches" description="The number of texture line fetches from the L2 cache." units="issues" />
+        <event offset="233" counter="TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED" title="Core Texture Line Fetches" name="Compressed line fetches" description="The number of texture line fetches from the L2 cache that are block compressed textures." units="issues" />
+        <event offset="234" counter="TEX_TFCH_NUM_OPERATIONS" title="Core Texture Cycles" name="Cache lookups" description="The number of texture cache lookup cycles." units="requests" />
+        <event offset="235" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles" />
+        <event offset="236" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles" />
+        <event offset="237" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles" />
+        <event offset="238" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles" />
+        <event offset="239" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles" />
+        <event offset="240" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles" />
+        <event offset="241" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions" />
+        <event offset="242" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="243" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="244" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions" />
+        <event offset="245" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions" />
+        <event offset="246" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats" />
+        <event offset="247" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="248" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats" />
+        <event offset="249" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="250" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats" />
+        <event offset="251" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="252" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats" />
+        <event offset="253" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats" />
+        <event offset="254" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats" />
+        <event offset="255" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats" />
+    </category>
+    <category name="Memory System" per_cpu="no">
+        <event offset="132" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests" />
+        <event offset="144" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests" />
+        <event offset="145" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles" />
+        <event offset="146" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests" />
+        <event offset="147" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles" />
+        <event offset="148" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests" />
+        <event offset="149" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles" />
+        <event offset="150" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests" />
+        <event offset="151" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles" />
+        <event offset="152" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests" />
+        <event offset="153" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests" />
+        <event offset="154" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests" />
+        <event offset="155" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests" />
+        <event offset="156" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests" />
+        <event offset="157" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions" />
+        <event offset="158" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" />
+        <event offset="159" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" />
+        <event offset="160" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats" />
+        <event offset="161" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles" />
+        <event offset="162" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="163" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="164" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="165" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" />
+        <event offset="166" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" />
+        <event offset="167" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" />
+        <event offset="168" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" />
+        <event offset="169" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" />
+        <event offset="170" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions" />
+        <event offset="171" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" />
+        <event offset="172" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" />
+        <event offset="173" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" />
+        <event offset="174" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" />
+        <event offset="175" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats" />
+        <event offset="176" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles" />
+        <event offset="177" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="178" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="179" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="180" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions" />
+        <event offset="181" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file
diff --git a/src/panfrost/perf/G57.xml b/src/panfrost/perf/G57.xml
new file mode 100644 (file)
index 0000000..20f9869
--- /dev/null
@@ -0,0 +1,179 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="TNAx">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles" />
+        <event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles" />
+        <event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" />
+        <event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks" />
+        <event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles" />
+        <event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles" />
+        <event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles" />
+        <event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" />
+        <event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks" />
+        <event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles" />
+        <event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles" />
+        <event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles" />
+        <event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" />
+        <event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks" />
+        <event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles" />
+        <event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles" />
+        <event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles" />
+        <event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles" />
+        <event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles" />
+    </category>
+    <category name="Memory System" per_cpu="no">
+        <event offset="196" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests" />
+        <event offset="208" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests" />
+        <event offset="209" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles" />
+        <event offset="210" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests" />
+        <event offset="211" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles" />
+        <event offset="212" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests" />
+        <event offset="213" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles" />
+        <event offset="214" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests" />
+        <event offset="215" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles" />
+        <event offset="216" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests" />
+        <event offset="217" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests" />
+        <event offset="218" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests" />
+        <event offset="219" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests" />
+        <event offset="220" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests" />
+        <event offset="221" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions" />
+        <event offset="222" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" />
+        <event offset="223" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" />
+        <event offset="224" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats" />
+        <event offset="225" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles" />
+        <event offset="226" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="227" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="228" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="229" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" />
+        <event offset="230" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" />
+        <event offset="231" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" />
+        <event offset="232" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" />
+        <event offset="233" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" />
+        <event offset="234" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions" />
+        <event offset="235" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" />
+        <event offset="236" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" />
+        <event offset="237" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" />
+        <event offset="238" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" />
+        <event offset="239" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats" />
+        <event offset="240" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles" />
+        <event offset="241" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="242" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="243" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="244" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions" />
+        <event offset="245" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles" />
+        <event offset="197" advanced="yes" counter="FRAG_PRIMITIVES_OUT" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives" />
+        <event offset="198" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" />
+        <event offset="199" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles" />
+        <event offset="201" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps" />
+        <event offset="202" counter="FRAG_PARTIAL_QUADS_RAST" title="Core Quads" name="Partial rasterized quads" description="The number of partially-rasterized fragment quads created." units="quads" />
+        <event offset="203" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads" />
+        <event offset="204" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" />
+        <event offset="205" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" />
+        <event offset="206" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" />
+        <event offset="207" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" />
+        <event offset="208" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" />
+        <event offset="209" counter="WARP_REG_SIZE_64" title="Core Warps" name="All register warps" description="The number of warps that require more than 32 registers." units="warps" />
+        <event offset="210" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles" />
+        <event offset="211" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles" />
+        <event offset="212" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" />
+        <event offset="213" counter="FULL_QUAD_WARPS" title="Core Warps" name="Full quad warps" description="The number of warps that are fully populated with quads." units="warps" />
+        <event offset="214" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles" />
+        <event offset="215" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" />
+        <event offset="216" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps" />
+        <event offset="217" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles" />
+        <event offset="218" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles" />
+        <event offset="219" counter="EXEC_INSTR_FMA" title="Core PU Instructions" name="FMA instructions" description="The number of instructions issued to the FMA pipe." units="instructions" />
+        <event offset="220" counter="EXEC_INSTR_CVT" title="Core PU Instructions" name="CVT instructions" description="The number of instructions issued to the CVT pipe." units="instructions" />
+        <event offset="221" counter="EXEC_INSTR_SFU" title="Core PU Instructions" name="SFU instructions" description="The number of instructions issued to the SFU pipe." units="instructions" />
+        <event offset="222" counter="EXEC_INSTR_MSG" title="Core PU Instructions" name="Message instructions" description="The number of instructions issued to the MSG pipe." units="instructions" />
+        <event offset="223" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions" />
+        <event offset="224" advanced="yes" counter="EXEC_ICACHE_MISS" title="Core PU Instructions" name="Instruction cache misses" description="The number of instruction cache misses." units="requests" />
+        <event offset="225" advanced="yes" counter="EXEC_STARVE_ARITH" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where the processing unit is starved of work." units="cycles" />
+        <event offset="226" counter="CALL_BLEND_SHADER" title="Core PU Instructions" name="Blend shader calls" description="The number of blend shader invocations executed." units="instructions" />
+        <event offset="227" counter="TEX_MSGI_NUM_FLITS" title="Texture Bus" name="Input beats" description="The number of texture request message data beats." units="beats" />
+        <event offset="228" counter="TEX_DFCH_CLK_STALLED" title="Core Texture Stalls" name="Descriptor stall cycles" description="The number of cycles where a quad is stalled on texture descriptor fetch." units="cycles" />
+        <event offset="229" counter="TEX_TFCH_CLK_STALLED" title="Core Texture Stalls" name="Fetch queue stall cycles" description="The number of cycles where a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles" />
+        <event offset="230" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" title="Core Texture Stalls" name="Filtering unit stall cycles" description="The number of cycles where the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles" />
+        <event offset="231" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles" />
+        <event offset="232" counter="TEX_FILT_NUM_FXR_OPERATIONS" title="Core Texture Cycles" name="4x bilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement nearest or bilinear filtering." units="cycles" />
+        <event offset="233" counter="TEX_FILT_NUM_FST_OPERATIONS" title="Core Texture Cycles" name="2x trilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement trilinear filtering." units="cycles" />
+        <event offset="234" counter="TEX_MSGO_NUM_MSG" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads" />
+        <event offset="235" counter="TEX_MSGO_NUM_FLITS" title="Texture Bus" name="Output beats" description="The number of texture response message data beats." units="beats" />
+        <event offset="236" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles" />
+        <event offset="237" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles" />
+        <event offset="238" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles" />
+        <event offset="239" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles" />
+        <event offset="240" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles" />
+        <event offset="241" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions" />
+        <event offset="242" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="243" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="244" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions" />
+        <event offset="245" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions" />
+        <event offset="246" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats" />
+        <event offset="247" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="248" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats" />
+        <event offset="249" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="250" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats" />
+        <event offset="251" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="252" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats" />
+        <event offset="253" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats" />
+        <event offset="254" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats" />
+        <event offset="255" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="4" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles" />
+        <event offset="6" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives" />
+        <event offset="7" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives" />
+        <event offset="8" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives" />
+        <event offset="9" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" />
+        <event offset="10" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" />
+        <event offset="11" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" />
+        <event offset="12" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" />
+        <event offset="13" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" />
+        <event offset="14" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" />
+        <event offset="17" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" />
+        <event offset="19" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" />
+        <event offset="21" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests" />
+        <event offset="23" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles" />
+        <event offset="24" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles" />
+        <event offset="26" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="27" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests" />
+        <event offset="31" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles" />
+        <event offset="34" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="35" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" />
+        <event offset="37" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests" />
+        <event offset="38" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles" />
+        <event offset="54" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles" />
+        <event offset="55" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file
diff --git a/src/panfrost/perf/G68.xml b/src/panfrost/perf/G68.xml
new file mode 100644 (file)
index 0000000..9095d9a
--- /dev/null
@@ -0,0 +1,179 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="TOTx">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles" />
+        <event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles" />
+        <event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" />
+        <event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks" />
+        <event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles" />
+        <event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles" />
+        <event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles" />
+        <event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" />
+        <event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks" />
+        <event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles" />
+        <event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles" />
+        <event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles" />
+        <event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" />
+        <event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks" />
+        <event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles" />
+        <event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles" />
+        <event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles" />
+        <event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles" />
+        <event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles" />
+    </category>
+    <category name="Memory System" per_cpu="no">
+        <event offset="196" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests" />
+        <event offset="208" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests" />
+        <event offset="209" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles" />
+        <event offset="210" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests" />
+        <event offset="211" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles" />
+        <event offset="212" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests" />
+        <event offset="213" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles" />
+        <event offset="214" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests" />
+        <event offset="215" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles" />
+        <event offset="216" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests" />
+        <event offset="217" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests" />
+        <event offset="218" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests" />
+        <event offset="219" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests" />
+        <event offset="220" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests" />
+        <event offset="221" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions" />
+        <event offset="222" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" />
+        <event offset="223" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" />
+        <event offset="224" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats" />
+        <event offset="225" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles" />
+        <event offset="226" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="227" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="228" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="229" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" />
+        <event offset="230" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" />
+        <event offset="231" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" />
+        <event offset="232" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" />
+        <event offset="233" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" />
+        <event offset="234" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions" />
+        <event offset="235" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" />
+        <event offset="236" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" />
+        <event offset="237" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" />
+        <event offset="238" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" />
+        <event offset="239" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats" />
+        <event offset="240" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles" />
+        <event offset="241" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="242" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="243" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="244" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions" />
+        <event offset="245" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles" />
+        <event offset="197" advanced="yes" counter="FRAG_PRIMITIVES_OUT" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives" />
+        <event offset="198" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" />
+        <event offset="199" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles" />
+        <event offset="201" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps" />
+        <event offset="202" counter="FRAG_PARTIAL_QUADS_RAST" title="Core Quads" name="Partial rasterized quads" description="The number of partially-rasterized fragment quads created." units="quads" />
+        <event offset="203" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads" />
+        <event offset="204" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" />
+        <event offset="205" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" />
+        <event offset="206" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" />
+        <event offset="207" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" />
+        <event offset="208" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" />
+        <event offset="209" counter="WARP_REG_SIZE_64" title="Core Warps" name="All register warps" description="The number of warps that require more than 32 registers." units="warps" />
+        <event offset="210" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles" />
+        <event offset="211" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles" />
+        <event offset="212" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" />
+        <event offset="213" counter="FULL_QUAD_WARPS" title="Core Warps" name="Full quad warps" description="The number of warps that are fully populated with quads." units="warps" />
+        <event offset="214" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles" />
+        <event offset="215" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" />
+        <event offset="216" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps" />
+        <event offset="217" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles" />
+        <event offset="218" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles" />
+        <event offset="219" counter="EXEC_INSTR_FMA" title="Core PU Instructions" name="FMA instructions" description="The number of instructions issued to the FMA pipe." units="instructions" />
+        <event offset="220" counter="EXEC_INSTR_CVT" title="Core PU Instructions" name="CVT instructions" description="The number of instructions issued to the CVT pipe." units="instructions" />
+        <event offset="221" counter="EXEC_INSTR_SFU" title="Core PU Instructions" name="SFU instructions" description="The number of instructions issued to the SFU pipe." units="instructions" />
+        <event offset="222" counter="EXEC_INSTR_MSG" title="Core PU Instructions" name="Message instructions" description="The number of instructions issued to the MSG pipe." units="instructions" />
+        <event offset="223" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions" />
+        <event offset="224" advanced="yes" counter="EXEC_ICACHE_MISS" title="Core PU Instructions" name="Instruction cache misses" description="The number of instruction cache misses." units="requests" />
+        <event offset="225" advanced="yes" counter="EXEC_STARVE_ARITH" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where the processing unit is starved of work." units="cycles" />
+        <event offset="226" counter="CALL_BLEND_SHADER" title="Core PU Instructions" name="Blend shader calls" description="The number of blend shader invocations executed." units="instructions" />
+        <event offset="227" counter="TEX_MSGI_NUM_FLITS" title="Texture Bus" name="Input beats" description="The number of texture request message data beats." units="beats" />
+        <event offset="228" counter="TEX_DFCH_CLK_STALLED" title="Core Texture Stalls" name="Descriptor stall cycles" description="The number of cycles where a quad is stalled on texture descriptor fetch." units="cycles" />
+        <event offset="229" counter="TEX_TFCH_CLK_STALLED" title="Core Texture Stalls" name="Fetch queue stall cycles" description="The number of cycles where a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles" />
+        <event offset="230" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" title="Core Texture Stalls" name="Filtering unit stall cycles" description="The number of cycles where the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles" />
+        <event offset="231" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles" />
+        <event offset="232" counter="TEX_FILT_NUM_FXR_OPERATIONS" title="Core Texture Cycles" name="4x bilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement nearest or bilinear filtering." units="cycles" />
+        <event offset="233" counter="TEX_FILT_NUM_FST_OPERATIONS" title="Core Texture Cycles" name="2x trilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement trilinear filtering." units="cycles" />
+        <event offset="234" counter="TEX_MSGO_NUM_MSG" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads" />
+        <event offset="235" counter="TEX_MSGO_NUM_FLITS" title="Texture Bus" name="Output beats" description="The number of texture response message data beats." units="beats" />
+        <event offset="236" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles" />
+        <event offset="237" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles" />
+        <event offset="238" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles" />
+        <event offset="239" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles" />
+        <event offset="240" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles" />
+        <event offset="241" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions" />
+        <event offset="242" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="243" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="244" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions" />
+        <event offset="245" counter="SHADER_CORE_ACTIVE" title="Core Cycles" name="Any active" description="The number of cycles where the shader core is processing either a non-fragment workload or a fragment workload." units="cycles" />
+        <event offset="246" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats" />
+        <event offset="247" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="248" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats" />
+        <event offset="249" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="250" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats" />
+        <event offset="251" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="252" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats" />
+        <event offset="253" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats" />
+        <event offset="254" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats" />
+        <event offset="255" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="4" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles" />
+        <event offset="6" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives" />
+        <event offset="7" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives" />
+        <event offset="8" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives" />
+        <event offset="9" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" />
+        <event offset="10" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" />
+        <event offset="11" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" />
+        <event offset="12" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" />
+        <event offset="13" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" />
+        <event offset="14" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" />
+        <event offset="17" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" />
+        <event offset="19" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" />
+        <event offset="21" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests" />
+        <event offset="23" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles" />
+        <event offset="24" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles" />
+        <event offset="26" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="27" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests" />
+        <event offset="31" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles" />
+        <event offset="34" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="35" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" />
+        <event offset="37" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests" />
+        <event offset="38" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles" />
+        <event offset="54" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles" />
+        <event offset="55" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file
diff --git a/src/panfrost/perf/G71.xml b/src/panfrost/perf/G71.xml
new file mode 100644 (file)
index 0000000..080fd61
--- /dev/null
@@ -0,0 +1,177 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="TMIx">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles" />
+        <event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles" />
+        <event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" />
+        <event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks" />
+        <event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles" />
+        <event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles" />
+        <event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles" />
+        <event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" />
+        <event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks" />
+        <event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles" />
+        <event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles" />
+        <event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles" />
+        <event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" />
+        <event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks" />
+        <event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles" />
+        <event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles" />
+        <event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles" />
+        <event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles" />
+        <event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="68" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles" />
+        <event offset="70" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives" />
+        <event offset="71" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives" />
+        <event offset="72" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives" />
+        <event offset="73" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" />
+        <event offset="74" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" />
+        <event offset="75" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" />
+        <event offset="76" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" />
+        <event offset="77" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" />
+        <event offset="78" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" />
+        <event offset="81" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" />
+        <event offset="83" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" />
+        <event offset="85" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests" />
+        <event offset="87" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles" />
+        <event offset="88" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles" />
+        <event offset="90" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="91" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests" />
+        <event offset="95" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles" />
+        <event offset="98" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="99" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" />
+        <event offset="101" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests" />
+        <event offset="102" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles" />
+        <event offset="118" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles" />
+        <event offset="119" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles" />
+        <event offset="197" advanced="yes" counter="FRAG_PRIMITIVES" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives" />
+        <event offset="198" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" />
+        <event offset="199" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles" />
+        <event offset="201" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps" />
+        <event offset="202" counter="FRAG_PARTIAL_WARPS" title="Core Warps" name="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps" />
+        <event offset="203" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads" />
+        <event offset="204" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" />
+        <event offset="205" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" />
+        <event offset="206" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" />
+        <event offset="207" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" />
+        <event offset="208" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" />
+        <event offset="210" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles" />
+        <event offset="211" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles" />
+        <event offset="212" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" />
+        <event offset="214" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles" />
+        <event offset="215" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" />
+        <event offset="216" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps" />
+        <event offset="217" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles" />
+        <event offset="218" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles" />
+        <event offset="219" advanced="yes" counter="EXEC_ACTIVE" title="Core Cycles" name="Execution engine active" description="The number of cycles where the execution engine unit is processing at least one thread." units="cycles" />
+        <event offset="220" counter="EXEC_INSTR_COUNT" title="Core EE Instructions" name="Executed instructions" description="The number of instructions executed per warp." units="instructions" />
+        <event offset="221" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions" />
+        <event offset="222" advanced="yes" counter="EXEC_INSTR_STARVING" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where no new threads are available for execution." units="cycles" />
+        <event offset="223" advanced="yes" counter="ARITH_INSTR_SINGLE_FMA" title="Core EE Instructions" name="Arithmetic instructions" description="The number of instructions where the workload is a single FMA pipe arithmetic operation." units="instructions" />
+        <event offset="224" advanced="yes" counter="ARITH_INSTR_DOUBLE" title="Core EE Instructions" name="Dual Arithmetic instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe arithmetic operation." units="instructions" />
+        <event offset="225" advanced="yes" counter="ARITH_INSTR_MSG" title="Core EE Instructions" name="Arithmetic + Message instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe message operation" units="instructions" />
+        <event offset="226" advanced="yes" counter="ARITH_INSTR_MSG_ONLY" title="Core EE Instructions" name="Message instructions" description="The number of instructions where the workload is a single ADD pipe message operation, with no FMA pipe operation" units="instructions" />
+        <event offset="227" counter="TEX_INSTR" title="Core Texture Requests" name="Texture requests" description="The number of thread-width texture operations processed." units="instructions" />
+        <event offset="228" counter="TEX_INSTR_MIPMAP" title="Core Texture Requests" name="Mipmapped texture request" description="The number of texture operations that act on a mipmapped texture." units="instructions" />
+        <event offset="229" counter="TEX_INSTR_COMPRESSED" title="Core Texture Requests" name="Compressed texture requests" description="The number of texture operations acting on a compressed texture." units="instructions" />
+        <event offset="230" counter="TEX_INSTR_3D" title="Core Texture Requests" name="3D texture requests" description="The number of texture operations acting on a 3D texture." units="instructions" />
+        <event offset="231" counter="TEX_INSTR_TRILINEAR" title="Core Texture Requests" name="Trilinear filtered requests" description="The number of texture operations using a trilinear texture filter." units="instructions" />
+        <event offset="232" counter="TEX_COORD_ISSUE" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles" />
+        <event offset="233" advanced="yes" counter="TEX_COORD_STALL" title="Core Texture Cycles" name="Coordinate stall cycles" description="The number of clock cycles where threads are stalled at the texel coordinate calculation stage." units="cycles" />
+        <event offset="234" advanced="yes" counter="TEX_STARVE_CACHE" title="Core Texture Cycles" name="Line fill stall cycles" description="The number of clock cycles where at least one thread is waiting for data from the texture cache, but no lookup is completed." units="cycles" />
+        <event offset="235" advanced="yes" counter="TEX_STARVE_FILTER" title="Core Texture Cycles" name="Partial data stall cycles" description="The number of clock cycles where at least one thread fetched some data from the texture cache, but no filtering operation is started." units="cycles" />
+        <event offset="236" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles" />
+        <event offset="237" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles" />
+        <event offset="238" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles" />
+        <event offset="239" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles" />
+        <event offset="240" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles" />
+        <event offset="241" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions" />
+        <event offset="242" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="243" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="244" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions" />
+        <event offset="245" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions" />
+        <event offset="246" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats" />
+        <event offset="247" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="248" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats" />
+        <event offset="249" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="250" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats" />
+        <event offset="251" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="252" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats" />
+        <event offset="253" counter="BEATS_WR_LSC" title="Core Writes" name="Load/store write beats" description="The number of write beats sent by the load/store unit." units="beats" />
+        <event offset="254" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats" />
+        <event offset="255" advanced="yes" counter="BEATS_WR_OTHER" title="Core Writes" name="Other write beats" description="The number of write beats sent by any unit that is not specifically identified." units="beats" />
+    </category>
+    <category name="Memory System" per_cpu="no">
+        <event offset="132" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests" />
+        <event offset="144" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests" />
+        <event offset="145" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles" />
+        <event offset="146" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests" />
+        <event offset="147" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles" />
+        <event offset="148" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests" />
+        <event offset="149" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles" />
+        <event offset="150" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests" />
+        <event offset="151" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles" />
+        <event offset="152" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests" />
+        <event offset="153" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests" />
+        <event offset="154" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests" />
+        <event offset="155" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests" />
+        <event offset="156" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests" />
+        <event offset="157" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions" />
+        <event offset="158" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" />
+        <event offset="159" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" />
+        <event offset="160" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats" />
+        <event offset="161" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles" />
+        <event offset="162" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="163" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="164" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="165" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" />
+        <event offset="166" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" />
+        <event offset="167" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" />
+        <event offset="168" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" />
+        <event offset="169" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" />
+        <event offset="170" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions" />
+        <event offset="171" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" />
+        <event offset="172" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" />
+        <event offset="173" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" />
+        <event offset="174" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" />
+        <event offset="175" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats" />
+        <event offset="176" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles" />
+        <event offset="177" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="178" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="179" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="180" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions" />
+        <event offset="181" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file
diff --git a/src/panfrost/perf/G72.xml b/src/panfrost/perf/G72.xml
new file mode 100644 (file)
index 0000000..80b4201
--- /dev/null
@@ -0,0 +1,177 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="THEx">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles" />
+        <event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles" />
+        <event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" />
+        <event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks" />
+        <event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles" />
+        <event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles" />
+        <event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles" />
+        <event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" />
+        <event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks" />
+        <event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles" />
+        <event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles" />
+        <event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles" />
+        <event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" />
+        <event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks" />
+        <event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles" />
+        <event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles" />
+        <event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles" />
+        <event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles" />
+        <event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="68" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles" />
+        <event offset="70" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives" />
+        <event offset="71" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives" />
+        <event offset="72" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives" />
+        <event offset="73" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" />
+        <event offset="74" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" />
+        <event offset="75" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" />
+        <event offset="76" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" />
+        <event offset="77" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" />
+        <event offset="78" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" />
+        <event offset="81" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" />
+        <event offset="83" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" />
+        <event offset="85" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests" />
+        <event offset="87" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles" />
+        <event offset="88" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles" />
+        <event offset="90" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="91" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests" />
+        <event offset="95" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles" />
+        <event offset="98" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="99" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" />
+        <event offset="101" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests" />
+        <event offset="102" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles" />
+        <event offset="118" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles" />
+        <event offset="119" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles" />
+        <event offset="197" advanced="yes" counter="FRAG_PRIMITIVES" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives" />
+        <event offset="198" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" />
+        <event offset="199" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles" />
+        <event offset="201" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps" />
+        <event offset="202" counter="FRAG_PARTIAL_WARPS" title="Core Warps" name="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps" />
+        <event offset="203" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads" />
+        <event offset="204" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" />
+        <event offset="205" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" />
+        <event offset="206" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" />
+        <event offset="207" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" />
+        <event offset="208" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" />
+        <event offset="210" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles" />
+        <event offset="211" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles" />
+        <event offset="212" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" />
+        <event offset="214" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles" />
+        <event offset="215" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" />
+        <event offset="216" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps" />
+        <event offset="217" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles" />
+        <event offset="218" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles" />
+        <event offset="219" advanced="yes" counter="EXEC_ACTIVE" title="Core Cycles" name="Execution engine active" description="The number of cycles where the execution engine unit is processing at least one thread." units="cycles" />
+        <event offset="220" counter="EXEC_INSTR_COUNT" title="Core EE Instructions" name="Executed instructions" description="The number of instructions executed per warp." units="instructions" />
+        <event offset="221" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions" />
+        <event offset="222" advanced="yes" counter="EXEC_INSTR_STARVING" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where no new threads are available for execution." units="cycles" />
+        <event offset="223" advanced="yes" counter="ARITH_INSTR_SINGLE_FMA" title="Core EE Instructions" name="Arithmetic instructions" description="The number of instructions where the workload is a single FMA pipe arithmetic operation." units="instructions" />
+        <event offset="224" advanced="yes" counter="ARITH_INSTR_DOUBLE" title="Core EE Instructions" name="Dual Arithmetic instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe arithmetic operation." units="instructions" />
+        <event offset="225" advanced="yes" counter="ARITH_INSTR_MSG" title="Core EE Instructions" name="Arithmetic + Message instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe message operation" units="instructions" />
+        <event offset="226" advanced="yes" counter="ARITH_INSTR_MSG_ONLY" title="Core EE Instructions" name="Message instructions" description="The number of instructions where the workload is a single ADD pipe message operation, with no FMA pipe operation" units="instructions" />
+        <event offset="227" counter="TEX_INSTR" title="Core Texture Requests" name="Texture requests" description="The number of thread-width texture operations processed." units="instructions" />
+        <event offset="228" counter="TEX_INSTR_MIPMAP" title="Core Texture Requests" name="Mipmapped texture request" description="The number of texture operations that act on a mipmapped texture." units="instructions" />
+        <event offset="229" counter="TEX_INSTR_COMPRESSED" title="Core Texture Requests" name="Compressed texture requests" description="The number of texture operations acting on a compressed texture." units="instructions" />
+        <event offset="230" counter="TEX_INSTR_3D" title="Core Texture Requests" name="3D texture requests" description="The number of texture operations acting on a 3D texture." units="instructions" />
+        <event offset="231" counter="TEX_INSTR_TRILINEAR" title="Core Texture Requests" name="Trilinear filtered requests" description="The number of texture operations using a trilinear texture filter." units="instructions" />
+        <event offset="232" counter="TEX_COORD_ISSUE" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles" />
+        <event offset="233" advanced="yes" counter="TEX_COORD_STALL" title="Core Texture Cycles" name="Coordinate stall cycles" description="The number of clock cycles where threads are stalled at the texel coordinate calculation stage." units="cycles" />
+        <event offset="234" advanced="yes" counter="TEX_STARVE_CACHE" title="Core Texture Cycles" name="Line fill stall cycles" description="The number of clock cycles where at least one thread is waiting for data from the texture cache, but no lookup is completed." units="cycles" />
+        <event offset="235" advanced="yes" counter="TEX_STARVE_FILTER" title="Core Texture Cycles" name="Partial data stall cycles" description="The number of clock cycles where at least one thread fetched some data from the texture cache, but no filtering operation is started." units="cycles" />
+        <event offset="236" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles" />
+        <event offset="237" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles" />
+        <event offset="238" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles" />
+        <event offset="239" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles" />
+        <event offset="240" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles" />
+        <event offset="241" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions" />
+        <event offset="242" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="243" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="244" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions" />
+        <event offset="245" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions" />
+        <event offset="246" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats" />
+        <event offset="247" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="248" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats" />
+        <event offset="249" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="250" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats" />
+        <event offset="251" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="252" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats" />
+        <event offset="253" counter="BEATS_WR_LSC" title="Core Writes" name="Load/store write beats" description="The number of write beats sent by the load/store unit." units="beats" />
+        <event offset="254" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats" />
+        <event offset="255" advanced="yes" counter="BEATS_WR_OTHER" title="Core Writes" name="Other write beats" description="The number of write beats sent by any unit that is not specifically identified." units="beats" />
+    </category>
+    <category name="Memory System" per_cpu="no">
+        <event offset="132" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests" />
+        <event offset="144" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests" />
+        <event offset="145" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles" />
+        <event offset="146" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests" />
+        <event offset="147" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles" />
+        <event offset="148" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests" />
+        <event offset="149" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles" />
+        <event offset="150" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests" />
+        <event offset="151" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles" />
+        <event offset="152" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests" />
+        <event offset="153" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests" />
+        <event offset="154" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests" />
+        <event offset="155" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests" />
+        <event offset="156" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests" />
+        <event offset="157" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions" />
+        <event offset="158" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" />
+        <event offset="159" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" />
+        <event offset="160" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats" />
+        <event offset="161" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles" />
+        <event offset="162" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="163" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="164" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="165" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" />
+        <event offset="166" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" />
+        <event offset="167" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" />
+        <event offset="168" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" />
+        <event offset="169" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" />
+        <event offset="170" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions" />
+        <event offset="171" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" />
+        <event offset="172" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" />
+        <event offset="173" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" />
+        <event offset="174" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" />
+        <event offset="175" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats" />
+        <event offset="176" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles" />
+        <event offset="177" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="178" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="179" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="180" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions" />
+        <event offset="181" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file
diff --git a/src/panfrost/perf/G76.xml b/src/panfrost/perf/G76.xml
new file mode 100644 (file)
index 0000000..d0b4436
--- /dev/null
@@ -0,0 +1,179 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="TNOx">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles" />
+        <event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles" />
+        <event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" />
+        <event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks" />
+        <event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles" />
+        <event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles" />
+        <event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles" />
+        <event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" />
+        <event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks" />
+        <event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles" />
+        <event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles" />
+        <event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles" />
+        <event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" />
+        <event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks" />
+        <event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles" />
+        <event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles" />
+        <event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles" />
+        <event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles" />
+        <event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="68" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles" />
+        <event offset="70" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives" />
+        <event offset="71" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives" />
+        <event offset="72" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives" />
+        <event offset="73" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" />
+        <event offset="74" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" />
+        <event offset="75" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" />
+        <event offset="76" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" />
+        <event offset="77" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" />
+        <event offset="78" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" />
+        <event offset="81" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" />
+        <event offset="83" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" />
+        <event offset="85" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests" />
+        <event offset="87" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles" />
+        <event offset="88" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles" />
+        <event offset="90" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="91" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests" />
+        <event offset="95" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles" />
+        <event offset="98" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="99" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" />
+        <event offset="101" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests" />
+        <event offset="102" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles" />
+        <event offset="118" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles" />
+        <event offset="119" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles" />
+        <event offset="197" advanced="yes" counter="FRAG_PRIMITIVES" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives" />
+        <event offset="198" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" />
+        <event offset="199" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles" />
+        <event offset="201" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps" />
+        <event offset="202" counter="FRAG_PARTIAL_WARPS" title="Core Warps" name="Partial fragment warps" description="The number of fragment warps containing helper threads that do not correspond to a hit sample point." units="warps" />
+        <event offset="203" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads" />
+        <event offset="204" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" />
+        <event offset="205" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" />
+        <event offset="206" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" />
+        <event offset="207" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" />
+        <event offset="208" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" />
+        <event offset="209" counter="WARP_REG_SIZE_64" title="Core Warps" name="All register warps" description="The number of warps that require more than 32 registers." units="warps" />
+        <event offset="210" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles" />
+        <event offset="211" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles" />
+        <event offset="212" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" />
+        <event offset="213" counter="FULL_QUAD_WARPS" title="Core Warps" name="Full quad warps" description="The number of warps that are fully populated with quads." units="warps" />
+        <event offset="214" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles" />
+        <event offset="215" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" />
+        <event offset="216" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps" />
+        <event offset="217" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles" />
+        <event offset="218" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles" />
+        <event offset="219" advanced="yes" counter="EXEC_ACTIVE" title="Core Cycles" name="Execution engine active" description="The number of cycles where the execution engine unit is processing at least one thread." units="cycles" />
+        <event offset="220" counter="EXEC_INSTR_COUNT" title="Core EE Instructions" name="Executed instructions" description="The number of instructions executed per warp." units="instructions" />
+        <event offset="221" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions" />
+        <event offset="222" advanced="yes" counter="EXEC_INSTR_STARVING" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where no new threads are available for execution." units="cycles" />
+        <event offset="223" advanced="yes" counter="ARITH_INSTR_SINGLE_FMA" title="Core EE Instructions" name="Arithmetic instructions" description="The number of instructions where the workload is a single FMA pipe arithmetic operation." units="instructions" />
+        <event offset="224" advanced="yes" counter="ARITH_INSTR_DOUBLE" title="Core EE Instructions" name="Dual Arithmetic instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe arithmetic operation." units="instructions" />
+        <event offset="225" advanced="yes" counter="ARITH_INSTR_MSG" title="Core EE Instructions" name="Arithmetic + Message instructions" description="The number of instructions where the workload is one FMA pipe arithmetic operation and one ADD pipe message operation" units="instructions" />
+        <event offset="226" advanced="yes" counter="ARITH_INSTR_MSG_ONLY" title="Core EE Instructions" name="Message instructions" description="The number of instructions where the workload is a single ADD pipe message operation, with no FMA pipe operation" units="instructions" />
+        <event offset="227" counter="TEX_MSGI_NUM_QUADS" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads" />
+        <event offset="228" counter="TEX_DFCH_NUM_PASSES" title="Core Texture Quads" name="Texture issues" description="The number of quad-width filtering passes." units="issues" />
+        <event offset="229" counter="TEX_DFCH_NUM_PASSES_MISS" title="Core Texture Quads" name="Descriptor misses" description="The number of quad-width filtering passes that miss in the resource or sampler descriptor cache." units="requests" />
+        <event offset="230" counter="TEX_DFCH_NUM_PASSES_MIP_MAP" title="Core Texture Quads" name="Mipmapped texture issues" description="The number of quad-width filtering passes that use a mipmapped texture." units="issues" />
+        <event offset="231" counter="TEX_TIDX_NUM_SPLIT_MIP_MAP" title="Core Texture Quads" name="Trilinear filtered issues" description="The number of quad-width filtering passes that use a trilinear filter." units="issues" />
+        <event offset="232" counter="TEX_TFCH_NUM_LINES_FETCHED" title="Core Texture Line Fetches" name="Line fetches" description="The number of texture line fetches from the L2 cache." units="issues" />
+        <event offset="233" counter="TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED" title="Core Texture Line Fetches" name="Compressed line fetches" description="The number of texture line fetches from the L2 cache that are block compressed textures." units="issues" />
+        <event offset="234" counter="TEX_TFCH_NUM_OPERATIONS" title="Core Texture Cycles" name="Cache lookups" description="The number of texture cache lookup cycles." units="requests" />
+        <event offset="235" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles" />
+        <event offset="236" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles" />
+        <event offset="237" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles" />
+        <event offset="238" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles" />
+        <event offset="239" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles" />
+        <event offset="240" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles" />
+        <event offset="241" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions" />
+        <event offset="242" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="243" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="244" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions" />
+        <event offset="245" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions" />
+        <event offset="246" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats" />
+        <event offset="247" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="248" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats" />
+        <event offset="249" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="250" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats" />
+        <event offset="251" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="252" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats" />
+        <event offset="253" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats" />
+        <event offset="254" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats" />
+        <event offset="255" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats" />
+    </category>
+    <category name="Memory System" per_cpu="no">
+        <event offset="132" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests" />
+        <event offset="144" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests" />
+        <event offset="145" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles" />
+        <event offset="146" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests" />
+        <event offset="147" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles" />
+        <event offset="148" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests" />
+        <event offset="149" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles" />
+        <event offset="150" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests" />
+        <event offset="151" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles" />
+        <event offset="152" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests" />
+        <event offset="153" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests" />
+        <event offset="154" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests" />
+        <event offset="155" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests" />
+        <event offset="156" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests" />
+        <event offset="157" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions" />
+        <event offset="158" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" />
+        <event offset="159" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" />
+        <event offset="160" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats" />
+        <event offset="161" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles" />
+        <event offset="162" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="163" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="164" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="165" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" />
+        <event offset="166" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" />
+        <event offset="167" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" />
+        <event offset="168" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" />
+        <event offset="169" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" />
+        <event offset="170" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions" />
+        <event offset="171" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" />
+        <event offset="172" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" />
+        <event offset="173" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" />
+        <event offset="174" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" />
+        <event offset="175" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats" />
+        <event offset="176" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles" />
+        <event offset="177" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="178" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="179" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="180" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions" />
+        <event offset="181" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file
diff --git a/src/panfrost/perf/G77.xml b/src/panfrost/perf/G77.xml
new file mode 100644 (file)
index 0000000..60d9919
--- /dev/null
@@ -0,0 +1,179 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="TTRx">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles" />
+        <event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles" />
+        <event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" />
+        <event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks" />
+        <event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles" />
+        <event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles" />
+        <event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles" />
+        <event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" />
+        <event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks" />
+        <event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles" />
+        <event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles" />
+        <event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles" />
+        <event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" />
+        <event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks" />
+        <event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles" />
+        <event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles" />
+        <event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles" />
+        <event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles" />
+        <event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles" />
+    </category>
+    <category name="Memory System" per_cpu="no">
+        <event offset="196" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests" />
+        <event offset="208" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests" />
+        <event offset="209" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles" />
+        <event offset="210" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests" />
+        <event offset="211" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles" />
+        <event offset="212" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests" />
+        <event offset="213" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles" />
+        <event offset="214" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests" />
+        <event offset="215" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles" />
+        <event offset="216" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests" />
+        <event offset="217" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests" />
+        <event offset="218" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests" />
+        <event offset="219" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests" />
+        <event offset="220" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests" />
+        <event offset="221" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions" />
+        <event offset="222" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" />
+        <event offset="223" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" />
+        <event offset="224" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats" />
+        <event offset="225" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles" />
+        <event offset="226" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="227" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="228" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="229" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" />
+        <event offset="230" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" />
+        <event offset="231" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" />
+        <event offset="232" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" />
+        <event offset="233" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" />
+        <event offset="234" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions" />
+        <event offset="235" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" />
+        <event offset="236" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" />
+        <event offset="237" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" />
+        <event offset="238" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" />
+        <event offset="239" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats" />
+        <event offset="240" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles" />
+        <event offset="241" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="242" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="243" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="244" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions" />
+        <event offset="245" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles" />
+        <event offset="197" advanced="yes" counter="FRAG_PRIMITIVES_OUT" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives" />
+        <event offset="198" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" />
+        <event offset="199" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles" />
+        <event offset="201" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps" />
+        <event offset="202" counter="FRAG_PARTIAL_QUADS_RAST" title="Core Quads" name="Partial rasterized quads" description="The number of partially-rasterized fragment quads created." units="quads" />
+        <event offset="203" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads" />
+        <event offset="204" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" />
+        <event offset="205" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" />
+        <event offset="206" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" />
+        <event offset="207" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" />
+        <event offset="208" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" />
+        <event offset="209" counter="WARP_REG_SIZE_64" title="Core Warps" name="All register warps" description="The number of warps that require more than 32 registers." units="warps" />
+        <event offset="210" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles" />
+        <event offset="211" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles" />
+        <event offset="212" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" />
+        <event offset="213" counter="FULL_QUAD_WARPS" title="Core Warps" name="Full quad warps" description="The number of warps that are fully populated with quads." units="warps" />
+        <event offset="214" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles" />
+        <event offset="215" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" />
+        <event offset="216" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps" />
+        <event offset="217" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles" />
+        <event offset="218" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles" />
+        <event offset="219" counter="EXEC_INSTR_FMA" title="Core PU Instructions" name="FMA instructions" description="The number of instructions issued to the FMA pipe." units="instructions" />
+        <event offset="220" counter="EXEC_INSTR_CVT" title="Core PU Instructions" name="CVT instructions" description="The number of instructions issued to the CVT pipe." units="instructions" />
+        <event offset="221" counter="EXEC_INSTR_SFU" title="Core PU Instructions" name="SFU instructions" description="The number of instructions issued to the SFU pipe." units="instructions" />
+        <event offset="222" counter="EXEC_INSTR_MSG" title="Core PU Instructions" name="Message instructions" description="The number of instructions issued to the MSG pipe." units="instructions" />
+        <event offset="223" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions" />
+        <event offset="224" advanced="yes" counter="EXEC_ICACHE_MISS" title="Core PU Instructions" name="Instruction cache misses" description="The number of instruction cache misses." units="requests" />
+        <event offset="225" advanced="yes" counter="EXEC_STARVE_ARITH" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where the processing unit is starved of work." units="cycles" />
+        <event offset="226" counter="CALL_BLEND_SHADER" title="Core PU Instructions" name="Blend shader calls" description="The number of blend shader invocations executed." units="instructions" />
+        <event offset="227" counter="TEX_MSGI_NUM_FLITS" title="Texture Bus" name="Input beats" description="The number of texture request message data beats." units="beats" />
+        <event offset="228" counter="TEX_DFCH_CLK_STALLED" title="Core Texture Stalls" name="Descriptor stall cycles" description="The number of cycles where a quad is stalled on texture descriptor fetch." units="cycles" />
+        <event offset="229" counter="TEX_TFCH_CLK_STALLED" title="Core Texture Stalls" name="Fetch queue stall cycles" description="The number of cycles where a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles" />
+        <event offset="230" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" title="Core Texture Stalls" name="Filtering unit stall cycles" description="The number of cycles where the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles" />
+        <event offset="231" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles" />
+        <event offset="232" counter="TEX_FILT_NUM_FXR_OPERATIONS" title="Core Texture Cycles" name="4x bilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement nearest or bilinear filtering." units="cycles" />
+        <event offset="233" counter="TEX_FILT_NUM_FST_OPERATIONS" title="Core Texture Cycles" name="2x trilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement trilinear filtering." units="cycles" />
+        <event offset="234" counter="TEX_MSGO_NUM_MSG" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads" />
+        <event offset="235" counter="TEX_MSGO_NUM_FLITS" title="Texture Bus" name="Output beats" description="The number of texture response message data beats." units="beats" />
+        <event offset="236" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles" />
+        <event offset="237" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles" />
+        <event offset="238" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles" />
+        <event offset="239" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles" />
+        <event offset="240" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles" />
+        <event offset="241" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions" />
+        <event offset="242" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="243" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="244" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions" />
+        <event offset="245" advanced="yes" counter="ARITH_INSTR_FP_MUL" title="Core EE Instructions" name="Multiplier instructions" description="The number of instructions where the workload uses floating-point multiplier hardware." units="instructions" />
+        <event offset="246" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats" />
+        <event offset="247" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="248" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats" />
+        <event offset="249" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="250" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats" />
+        <event offset="251" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="252" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats" />
+        <event offset="253" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats" />
+        <event offset="254" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats" />
+        <event offset="255" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="4" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles" />
+        <event offset="6" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives" />
+        <event offset="7" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives" />
+        <event offset="8" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives" />
+        <event offset="9" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" />
+        <event offset="10" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" />
+        <event offset="11" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" />
+        <event offset="12" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" />
+        <event offset="13" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" />
+        <event offset="14" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" />
+        <event offset="17" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" />
+        <event offset="19" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" />
+        <event offset="21" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests" />
+        <event offset="23" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles" />
+        <event offset="24" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles" />
+        <event offset="26" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="27" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests" />
+        <event offset="31" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles" />
+        <event offset="34" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="35" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" />
+        <event offset="37" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests" />
+        <event offset="38" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles" />
+        <event offset="54" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles" />
+        <event offset="55" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file
diff --git a/src/panfrost/perf/G78.xml b/src/panfrost/perf/G78.xml
new file mode 100644 (file)
index 0000000..fa7d9e9
--- /dev/null
@@ -0,0 +1,179 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="TBOx">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="6" counter="GPU_ACTIVE" title="GPU Cycles" name="GPU active" description="The number of cycles where the GPU has a workload of any type queued for processing." units="cycles" />
+        <event offset="7" advanced="yes" counter="IRQ_ACTIVE" title="GPU Cycles" name="Interrupt active" description="The number of cycles where the GPU has a pending interrupt." units="cycles" />
+        <event offset="8" advanced="yes" counter="JS0_JOBS" title="GPU Jobs" name="Fragment jobs" description="The number of jobs processed by the GPU fragment queue." units="jobs" />
+        <event offset="9" counter="JS0_TASKS" title="GPU Tasks" name="Fragment tasks" description="The number of 32x32 pixel tasks processed by the GPU fragment queue." units="tasks" />
+        <event offset="10" counter="JS0_ACTIVE" title="GPU Cycles" name="Fragment queue active" description="The number of cycles where work is queued for processing in the GPU fragment queue." units="cycles" />
+        <event offset="12" advanced="yes" counter="JS0_WAIT_READ" title="GPU Wait Cycles" name="Fragment descriptor reads cycles" description="The number of cycles where queued fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="13" advanced="yes" counter="JS0_WAIT_ISSUE" title="GPU Wait Cycles" name="Fragment job issue cycles" description="The number of cycles where queued fragment work is waiting for an available processor." units="cycles" />
+        <event offset="14" advanced="yes" counter="JS0_WAIT_DEPEND" title="GPU Wait Cycles" name="Fragment job dependency cycles" description="The number of cycles where queued fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="15" advanced="yes" counter="JS0_WAIT_FINISH" title="GPU Wait Cycles" name="Fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued fragment work to complete." units="cycles" />
+        <event offset="16" advanced="yes" counter="JS1_JOBS" title="GPU Jobs" name="Non-fragment jobs" description="The number of jobs processed by the GPU non-fragment queue." units="jobs" />
+        <event offset="17" advanced="yes" counter="JS1_TASKS" title="GPU Tasks" name="Non-fragment tasks" description="The number of tasks processed by the GPU non-fragment queue." units="tasks" />
+        <event offset="18" counter="JS1_ACTIVE" title="GPU Cycles" name="Non-fragment queue active" description="The number of cycles where work is queued in the GPU non-fragment queue." units="cycles" />
+        <event offset="20" advanced="yes" counter="JS1_WAIT_READ" title="GPU Wait Cycles" name="Non-fragment descriptor read cycles" description="The number number of cycles where queued non-fragment work is waiting for a descriptor load." units="cycles" />
+        <event offset="21" advanced="yes" counter="JS1_WAIT_ISSUE" title="GPU Wait Cycles" name="Non-fragment job issue cycles" description="The number of cycles where queued non-fragment work is waiting for an available processor." units="cycles" />
+        <event offset="22" advanced="yes" counter="JS1_WAIT_DEPEND" title="GPU Wait Cycles" name="Non-fragment job dependency cycles" description="The number of cycles where queued non-fragment work is waiting for dependent work to complete." units="cycles" />
+        <event offset="23" advanced="yes" counter="JS1_WAIT_FINISH" title="GPU Wait Cycles" name="Non-fragment job finish cycles" description="The number of cycles where the GPU is waiting for issued non-fragment work to complete." units="cycles" />
+        <event offset="24" advanced="yes" counter="JS2_JOBS" title="GPU Jobs" name="Reserved jobs" description="The number of jobs processed by the GPU reserved queue." units="jobs" />
+        <event offset="25" advanced="yes" counter="JS2_TASKS" title="GPU Tasks" name="Reserved tasks" description="The number of tasks processed by the GPU reserved queue." units="tasks" />
+        <event offset="26" advanced="yes" counter="JS2_ACTIVE" title="GPU Cycles" name="Reserved queue active" description="The number of cycles where work is queued in the GPU reserved queue." units="cycles" />
+        <event offset="28" advanced="yes" counter="JS2_WAIT_READ" title="GPU Wait Cycles" name="Reserved descriptor read cycles" description="The number of cycles where queued reserved work is waiting for a descriptor load." units="cycles" />
+        <event offset="29" advanced="yes" counter="JS2_WAIT_ISSUE" title="GPU Wait Cycles" name="Reserved job issue cycles" description="The number of cycles where queued reserved work is waiting for an available processor." units="cycles" />
+        <event offset="30" advanced="yes" counter="JS2_WAIT_DEPEND" title="GPU Wait Cycles" name="Reserved job dependency cycles" description="The number of cycles where queued reserved work is waiting for dependent work to complete." units="cycles" />
+        <event offset="31" advanced="yes" counter="JS2_WAIT_FINISH" title="GPU Wait Cycles" name="Reserved job finish cycles" description="The number of cycles where the GPU is waiting for issued reserved work to complete." units="cycles" />
+    </category>
+    <category name="Memory System" per_cpu="no">
+        <event offset="196" advanced="yes" counter="MMU_REQUESTS" title="MMU Stage 1 Translations" name="MMU lookups" description="The number of main MMU address translations performed." units="requests" />
+        <event offset="208" advanced="yes" counter="L2_RD_MSG_IN" title="L2 Cache Requests" name="Read requests" description="The number of L2 cache read requests from internal masters." units="requests" />
+        <event offset="209" advanced="yes" counter="L2_RD_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Read stall cycles" description="The number of cycles L2 cache read requests from internal masters are stalled." units="cycles" />
+        <event offset="210" advanced="yes" counter="L2_WR_MSG_IN" title="L2 Cache Requests" name="Write requests" description="The number of L2 cache write requests from internal masters." units="requests" />
+        <event offset="211" advanced="yes" counter="L2_WR_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Write stall cycles" description="The number of cycles where L2 cache write requests from internal masters are stalled." units="cycles" />
+        <event offset="212" advanced="yes" counter="L2_SNP_MSG_IN" title="L2 Cache Requests" name="Snoop requests" description="The number of L2 snoop requests from internal masters." units="requests" />
+        <event offset="213" advanced="yes" counter="L2_SNP_MSG_IN_STALL" title="L2 Cache Stall Cycles" name="Snoop stall cycles" description="The number of cycles where L2 cache snoop requests from internal masters are stalled." units="cycles" />
+        <event offset="214" advanced="yes" counter="L2_RD_MSG_OUT" title="L2 Cache Requests" name="L1 read requests" description="The number of L1 cache read requests sent by the L2 cache to an internal master." units="requests" />
+        <event offset="215" advanced="yes" counter="L2_RD_MSG_OUT_STALL" title="L2 Cache Stall Cycles" name="L1 read stall cycles" description="The number of cycles where L1 cache read requests sent by the L2 cache to an internal master are stalled." units="cycles" />
+        <event offset="216" advanced="yes" counter="L2_WR_MSG_OUT" title="L2 Cache Requests" name="L1 write requests" description="The number of L1 cache write responses sent by the L2 cache to an internal master." units="requests" />
+        <event offset="217" counter="L2_ANY_LOOKUP" title="L2 Cache Lookups" name="Any lookup" description="The number of L2 cache lookups performed." units="requests" />
+        <event offset="218" counter="L2_READ_LOOKUP" title="L2 Cache Lookups" name="Read lookup" description="The number of L2 cache read lookups performed." units="requests" />
+        <event offset="219" counter="L2_WRITE_LOOKUP" title="L2 Cache Lookups" name="Write lookup" description="The number of L2 cache write lookups performed." units="requests" />
+        <event offset="220" advanced="yes" counter="L2_EXT_SNOOP_LOOKUP" title="L2 Cache Lookups" name="External snoop lookups" description="The number of coherency snoop lookups performed that were triggered by an external master." units="requests" />
+        <event offset="221" counter="L2_EXT_READ" title="External Bus Accesses" name="Read transaction" description="The number of external read transactions." units="transactions" />
+        <event offset="222" advanced="yes" counter="L2_EXT_READ_NOSNP" title="External Bus Accesses" name="ReadNoSnoop transactions" description="The number of external non-coherent read transactions." units="transactions" />
+        <event offset="223" advanced="yes" counter="L2_EXT_READ_UNIQUE" title="External Bus Accesses" name="ReadUnique transactions" description="The number of external coherent read unique transactions." units="transactions" />
+        <event offset="224" counter="L2_EXT_READ_BEATS" title="External Bus Beats" name="Read beat" description="The number of external bus data read cycles." units="beats" />
+        <event offset="225" counter="L2_EXT_AR_STALL" title="External Bus Stalls" name="Read stall cycles" description="The number of cycles where a read is stalled waiting for the external bus." units="cycles" />
+        <event offset="226" counter="L2_EXT_AR_CNT_Q1" title="External Bus Outstanding Reads" name="0-25% outstanding" description="The number of read transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="227" counter="L2_EXT_AR_CNT_Q2" title="External Bus Outstanding Reads" name="25-50% outstanding" description="The number of read transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="228" counter="L2_EXT_AR_CNT_Q3" title="External Bus Outstanding Reads" name="50-75% outstanding" description="The number of read transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="229" counter="L2_EXT_RRESP_0_127" title="External Bus Read Latency" name="0-127 cycles" description="The number of data beats returned 0-127 cycles after the read request." units="beats" />
+        <event offset="230" counter="L2_EXT_RRESP_128_191" title="External Bus Read Latency" name="128-191 cycles" description="The number of data beats returned 128-191 cycles after the read request." units="beats" />
+        <event offset="231" counter="L2_EXT_RRESP_192_255" title="External Bus Read Latency" name="192-255 cycles" description="The number of data beats returned 192-255 cycles after the read request." units="beats" />
+        <event offset="232" counter="L2_EXT_RRESP_256_319" title="External Bus Read Latency" name="256-319 cycles" description="The number of data beats returned 256-319 cycles after the read request." units="beats" />
+        <event offset="233" counter="L2_EXT_RRESP_320_383" title="External Bus Read Latency" name="320-383 cycles" description="The number of data beats returned 320-383 cycles after the read request." units="beats" />
+        <event offset="234" counter="L2_EXT_WRITE" title="External Bus Accesses" name="Write transaction" description="The number of external write transactions." units="transactions" />
+        <event offset="235" advanced="yes" counter="L2_EXT_WRITE_NOSNP_FULL" title="External Bus Accesses" name="WriteNoSnoopFull transactions" description="The number of external non-coherent full write transactions." units="transactions" />
+        <event offset="236" advanced="yes" counter="L2_EXT_WRITE_NOSNP_PTL" title="External Bus Accesses" name="WriteNoSnoopPartial transactions" description="The number of external non-coherent partial write transactions." units="transactions" />
+        <event offset="237" advanced="yes" counter="L2_EXT_WRITE_SNP_FULL" title="External Bus Accesses" name="WriteSnoopFull transactions" description="The number of external coherent full write transactions." units="transactions" />
+        <event offset="238" advanced="yes" counter="L2_EXT_WRITE_SNP_PTL" title="External Bus Accesses" name="WriteSnoopPartial transactions" description="The number of external coherent partial write transactions." units="transactions" />
+        <event offset="239" counter="L2_EXT_WRITE_BEATS" title="External Bus Beats" name="Write beat" description="The number of external bus data write cycles." units="beats" />
+        <event offset="240" counter="L2_EXT_W_STALL" title="External Bus Stalls" name="Write stall cycles" description="The number of cycles where a write is stalled waiting for the external bus." units="cycles" />
+        <event offset="241" counter="L2_EXT_AW_CNT_Q1" title="External Bus Outstanding Writes" name="0-25% outstanding" description="The number of write transactions initiated when 0-25% of the maximum are in use." units="transactions" />
+        <event offset="242" counter="L2_EXT_AW_CNT_Q2" title="External Bus Outstanding Writes" name="25-50% outstanding" description="The number of write transactions initiated when 25-50% of the maximum are in use." units="transactions" />
+        <event offset="243" counter="L2_EXT_AW_CNT_Q3" title="External Bus Outstanding Writes" name="50-75% outstanding" description="The number of write transactions initiated when 50-75% of the maximum are in use." units="transactions" />
+        <event offset="244" advanced="yes" counter="L2_EXT_SNOOP" title="External Bus Accesses" name="Snoop transactions" description="The number of coherency snoops triggered by external masters." units="transactions" />
+        <event offset="245" advanced="yes" counter="L2_EXT_SNOOP_STALL" title="External Bus Stalls" name="Snoop stall cycles" description="The number of cycles where a coherency snoop triggered by external master is stalled." units="cycles" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment active" description="The number of cycles where the shader core is processing a fragment workload." units="cycles" />
+        <event offset="197" advanced="yes" counter="FRAG_PRIMITIVES_OUT" title="Core Primitives" name="Read primitives" description="The number of primitives read from the tile list by the fragment front-end." units="primitives" />
+        <event offset="198" counter="FRAG_PRIM_RAST" title="Core Primitives" name="Rasterized primitives" description="The number of primitives being rasterized." units="primitives" />
+        <event offset="199" counter="FRAG_FPK_ACTIVE" title="Core Cycles" name="Fragment FPKB active" description="The number of cycles where at least one quad is present in the pre-pipe quad queue." units="cycles" />
+        <event offset="201" counter="FRAG_WARPS" title="Core Warps" name="Fragment warps" description="The number of fragment warps created." units="warps" />
+        <event offset="202" counter="FRAG_PARTIAL_QUADS_RAST" title="Core Quads" name="Partial rasterized quads" description="The number of partially-rasterized fragment quads created." units="quads" />
+        <event offset="203" counter="FRAG_QUADS_RAST" title="Core Quads" name="Rasterized quads" description="The number of quads generated by the rasterization phase." units="quads" />
+        <event offset="204" counter="FRAG_QUADS_EZS_TEST" title="Core Quads" name="Early ZS tested quads" description="The number of quads that are undergoing early depth and stencil testing." units="quads" />
+        <event offset="205" counter="FRAG_QUADS_EZS_UPDATE" title="Core Quads" name="Early ZS updated quads" description="The number of quads undergoing early depth and stencil testing, that are capable of updating the framebuffer." units="quads" />
+        <event offset="206" counter="FRAG_QUADS_EZS_KILL" title="Core Quads" name="Early ZS killed quads" description="The number of quads killed by early depth and stencil testing." units="quads" />
+        <event offset="207" counter="FRAG_LZS_TEST" title="Core Quads" name="Late ZS tested quads" description="The number of quads undergoing late depth and stencil testing." units="quads" />
+        <event offset="208" counter="FRAG_LZS_KILL" title="Core Quads" name="Late ZS killed quads" description="The number of quads killed by late depth and stencil testing." units="quads" />
+        <event offset="209" counter="WARP_REG_SIZE_64" title="Core Warps" name="All register warps" description="The number of warps that require more than 32 registers." units="warps" />
+        <event offset="210" counter="FRAG_PTILES" title="Core Tiles" name="Tiles" description="The number of tiles processed by the shader core." units="tiles" />
+        <event offset="211" counter="FRAG_TRANS_ELIM" title="Core Tiles" name="Constant tiles killed" description="The number of tiles killed by transaction elimination." units="tiles" />
+        <event offset="212" counter="QUAD_FPK_KILLER" title="Core Quads" name="FPK occluder quads" description="The number of quads that are valid occluders for hidden surface removal." units="quads" />
+        <event offset="213" counter="FULL_QUAD_WARPS" title="Core Warps" name="Full quad warps" description="The number of warps that are fully populated with quads." units="warps" />
+        <event offset="214" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Non-fragment active" description="The number of cycles where the shader core is processing some non-fragment workload." units="cycles" />
+        <event offset="215" advanced="yes" counter="COMPUTE_TASKS" title="Core Tasks" name="Non-fragment tasks" description="The number of non-fragment tasks issued to the shader core." units="tasks" />
+        <event offset="216" counter="COMPUTE_WARPS" title="Core Warps" name="Non-fragment warps" description="The number of non-fragment warps created." units="warps" />
+        <event offset="217" advanced="yes" counter="COMPUTE_STARVING" title="Core Starvation Cycles" name="Non-fragment starvation cycles" description="The number of cycles where the shader core is processing a non-fragment workload and there are no new threads available for execution." units="cycles" />
+        <event offset="218" counter="EXEC_CORE_ACTIVE" title="Core Cycles" name="Execution core active" description="The number of cycles where the shader core is processing at least one warp." units="cycles" />
+        <event offset="219" counter="EXEC_INSTR_FMA" title="Core PU Instructions" name="FMA instructions" description="The number of instructions issued to the FMA pipe." units="instructions" />
+        <event offset="220" counter="EXEC_INSTR_CVT" title="Core PU Instructions" name="CVT instructions" description="The number of instructions issued to the CVT pipe." units="instructions" />
+        <event offset="221" counter="EXEC_INSTR_SFU" title="Core PU Instructions" name="SFU instructions" description="The number of instructions issued to the SFU pipe." units="instructions" />
+        <event offset="222" counter="EXEC_INSTR_MSG" title="Core PU Instructions" name="Message instructions" description="The number of instructions issued to the MSG pipe." units="instructions" />
+        <event offset="223" counter="EXEC_INSTR_DIVERGED" title="Core EE Instructions" name="Diverged instructions" description="The number of instructions executed per warp, that have control flow divergence." units="instructions" />
+        <event offset="224" advanced="yes" counter="EXEC_ICACHE_MISS" title="Core PU Instructions" name="Instruction cache misses" description="The number of instruction cache misses." units="requests" />
+        <event offset="225" advanced="yes" counter="EXEC_STARVE_ARITH" title="Core Starvation Cycles" name="Execution engine starvation cycles" description="The number of cycles where the processing unit is starved of work." units="cycles" />
+        <event offset="226" counter="CALL_BLEND_SHADER" title="Core PU Instructions" name="Blend shader calls" description="The number of blend shader invocations executed." units="instructions" />
+        <event offset="227" counter="TEX_MSGI_NUM_FLITS" title="Texture Bus" name="Input beats" description="The number of texture request message data beats." units="beats" />
+        <event offset="228" counter="TEX_DFCH_CLK_STALLED" title="Core Texture Stalls" name="Descriptor stall cycles" description="The number of cycles where a quad is stalled on texture descriptor fetch." units="cycles" />
+        <event offset="229" counter="TEX_TFCH_CLK_STALLED" title="Core Texture Stalls" name="Fetch queue stall cycles" description="The number of cycles where a quad is stalled on entering texture fetch because the fetch queue is full." units="cycles" />
+        <event offset="230" counter="TEX_TFCH_STARVED_PENDING_DATA_FETCH" title="Core Texture Stalls" name="Filtering unit stall cycles" description="The number of cycles where the filtering unit is idle and there is at least one quad present in the texture data fetch queue." units="cycles" />
+        <event offset="231" counter="TEX_FILT_NUM_OPERATIONS" title="Core Texture Cycles" name="Texturing active" description="The number of texture filtering issue cycles." units="cycles" />
+        <event offset="232" counter="TEX_FILT_NUM_FXR_OPERATIONS" title="Core Texture Cycles" name="4x bilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement nearest or bilinear filtering." units="cycles" />
+        <event offset="233" counter="TEX_FILT_NUM_FST_OPERATIONS" title="Core Texture Cycles" name="2x trilinear filtering active" description="The number of cycles where the filtering unit uses the 4x path to implement trilinear filtering." units="cycles" />
+        <event offset="234" counter="TEX_MSGO_NUM_MSG" title="Core Texture Quads" name="Texture requests" description="The number of quad-width texture operations processed by the texture unit." units="quads" />
+        <event offset="235" counter="TEX_MSGO_NUM_FLITS" title="Texture Bus" name="Output beats" description="The number of texture response message data beats." units="beats" />
+        <event offset="236" counter="LS_MEM_READ_FULL" title="Core Load/Store Cycles" name="Full read cycles" description="The number of full-width load/store cache reads." units="cycles" />
+        <event offset="237" counter="LS_MEM_READ_SHORT" title="Core Load/Store Cycles" name="Partial read cycles" description="The number of partial-width load/store cache reads." units="cycles" />
+        <event offset="238" counter="LS_MEM_WRITE_FULL" title="Core Load/Store Cycles" name="Full write cycles" description="The number of full-width load/store cache writes." units="cycles" />
+        <event offset="239" counter="LS_MEM_WRITE_SHORT" title="Core Load/Store Cycles" name="Partial write cycles" description="The number of partial-width load/store cache writes." units="cycles" />
+        <event offset="240" counter="LS_MEM_ATOMIC" title="Core Load/Store Cycles" name="Atomic access cycles" description="The number of load/store atomic accesses." units="cycles" />
+        <event offset="241" counter="VARY_INSTR" title="Core Varying Requests" name="Interpolation requests" description="The number of warp-width interpolation operations processed by the varying unit." units="instructions" />
+        <event offset="242" counter="VARY_SLOT_32" title="Core Varying Cycles" name="32-bit interpolation active" description="The number of 32-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="243" counter="VARY_SLOT_16" title="Core Varying Cycles" name="16-bit interpolation active" description="The number of 16-bit interpolation cycles processed by the varying unit." units="cycles" />
+        <event offset="244" advanced="yes" counter="ATTR_INSTR" title="Core Attribute Requests" name="Attribute requests" description="The number of instructions executed by the attribute unit." units="instructions" />
+        <event offset="245" counter="SHADER_CORE_ACTIVE" title="Core Cycles" name="Any active" description="The number of cycles where the shader core is processing either a non-fragment workload or a fragment workload." units="cycles" />
+        <event offset="246" counter="BEATS_RD_FTC" title="Core L2 Reads" name="Fragment L2 read beats" description="The number of read beats received by the fixed-function fragment front-end." units="beats" />
+        <event offset="247" counter="BEATS_RD_FTC_EXT" title="Core L2 Reads" name="Fragment external read beats" description="The number of read beats received by the fixed-function fragment front-end that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="248" counter="BEATS_RD_LSC" title="Core L2 Reads" name="Load/store L2 read beats" description="The number of read beats received by the load/store unit." units="beats" />
+        <event offset="249" counter="BEATS_RD_LSC_EXT" title="Core L2 Reads" name="Load/store external read beats" description="The number of read beats received by the load/store unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="250" counter="BEATS_RD_TEX" title="Core L2 Reads" name="Texture L2 read beats" description="The number of read beats received by the texture unit." units="beats" />
+        <event offset="251" counter="BEATS_RD_TEX_EXT" title="Core L2 Reads" name="Texture external read beats" description="The number of read beats received by the texture unit that required an external memory access due to an L2 cache miss." units="beats" />
+        <event offset="252" advanced="yes" counter="BEATS_RD_OTHER" title="Core L2 Reads" name="Other L2 read beats" description="The number of read beats received by a unit that is not specifically identified." units="beats" />
+        <event offset="253" counter="BEATS_WR_LSC_OTHER" title="Core Writes" name="Load/store other write beats" description="The number of write beats by the load/store unit that are due to any reason other than writeback." units="beats" />
+        <event offset="254" counter="BEATS_WR_TIB" title="Core Writes" name="Tile buffer write beats" description="The number of write beats sent by the tile buffer writeback unit." units="beats" />
+        <event offset="255" counter="BEATS_WR_LSC_WB" title="Core Writes" name="Load/store writeback write beats" description="The number of write beats by the load/store unit that are due to writeback." units="beats" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="4" counter="TILER_ACTIVE" title="GPU Cycles" name="Tiler active" description="The number of cycles where the tiler has a workload queued for processing." units="cycles" />
+        <event offset="6" counter="TRIANGLES" title="Input Primitives" name="Triangle primitives" description="The number of input triangle primitives." units="primitives" />
+        <event offset="7" counter="LINES" title="Input Primitives" name="Line primitives" description="The number of input line primitives." units="primitives" />
+        <event offset="8" counter="POINTS" title="Input Primitives" name="Point primitives" description="The number of input point primitives." units="primitives" />
+        <event offset="9" counter="FRONT_FACING" title="Visible Primitives" name="Front-facing primitives" description="The number of front-facing triangles that are visible after culling." units="primitives" />
+        <event offset="10" counter="BACK_FACING" title="Visible Primitives" name="Back-facing primitives" description="The number of back-facing triangles that are visible after culling." units="primitives" />
+        <event offset="11" counter="PRIM_VISIBLE" title="Primitive Culling" name="Visible primitives" description="The number of primitives that are visible after culling." units="primitives" />
+        <event offset="12" counter="PRIM_CULLED" title="Primitive Culling" name="Facing and XY plane test culled primitives" description="The number of primitives that are culled by facing or frustum XY plane tests." units="primitives" />
+        <event offset="13" counter="PRIM_CLIPPED" title="Primitive Culling" name="Z plane test culled primitives" description="The number of primitives that are culled by frustum Z plane tests." units="primitives" />
+        <event offset="14" counter="PRIM_SAT_CULLED" title="Primitive Culling" name="Sample test culled primitives" description="The number of primitives culled by the sample coverage test." units="primitives" />
+        <event offset="17" advanced="yes" counter="BUS_READ" title="Tiler L2 Accesses" name="Read beats" description="The number of internal bus data read cycles made by the tiler." units="beats" />
+        <event offset="19" advanced="yes" counter="BUS_WRITE" title="Tiler L2 Accesses" name="Write beats" description="The number of internal bus data write cycles made by the tiler." units="beats" />
+        <event offset="21" counter="IDVS_POS_SHAD_REQ" title="Tiler Shading Requests" name="Position shading requests" description="The number of position shading requests in the IDVS flow." units="requests" />
+        <event offset="23" advanced="yes" counter="IDVS_POS_SHAD_STALL" title="Tiler Cycles" name="Position shading stall cycles" description="The number of cycles where the tiler has a stalled position shading request." units="cycles" />
+        <event offset="24" advanced="yes" counter="IDVS_POS_FIFO_FULL" title="Tiler Cycles" name="Position FIFO full cycles" description="The number of cycles where the tiler has a stalled position shading buffer." units="cycles" />
+        <event offset="26" advanced="yes" counter="VCACHE_HIT" title="Tiler Vertex Cache" name="Position cache hits" description="The number of position lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="27" advanced="yes" counter="VCACHE_MISS" title="Tiler Vertex Cache" name="Position cache misses" description="The number of position lookups that miss in the vertex cache." units="requests" />
+        <event offset="31" advanced="yes" counter="VFETCH_STALL" title="Tiler Cycles" name="Primitive assembly busy stall cycles" description="The number of cycles where the tiler is stalled waiting for primitive assembly." units="cycles" />
+        <event offset="34" advanced="yes" counter="IDVS_VBU_HIT" title="Tiler Vertex Cache" name="Varying cache hits" description="The number of varying lookups that result in a hit in the vertex cache." units="requests" />
+        <event offset="35" advanced="yes" counter="IDVS_VBU_MISS" title="Tiler Vertex Cache" name="Varying cache misses" description="The number of varying lookups that miss in the vertex cache." units="requests" />
+        <event offset="37" counter="IDVS_VAR_SHAD_REQ" title="Tiler Shading Requests" name="Varying shading requests" description="The number of varying shading requests in the IDVS flow." units="requests" />
+        <event offset="38" advanced="yes" counter="IDVS_VAR_SHAD_STALL" title="Tiler Cycles" name="Varying shading stall cycles" description="The number of cycles where the tiler has a stalled varying shading request." units="cycles" />
+        <event offset="54" advanced="yes" counter="WRBUF_NO_AXI_ID_STALL" title="Tiler Cycles" name="Write buffer transaction stall cycles" description="The number of cycles where the tiler write buffer can not send data because it has no available write IDs." units="cycles" />
+        <event offset="55" advanced="yes" counter="WRBUF_AXI_STALL" title="Tiler Cycles" name="Write buffer write stall cycles" description="The number of cycles where the tiler write buffer can not send data because the bus is not ready." units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file
diff --git a/src/panfrost/perf/T72x.xml b/src/panfrost/perf/T72x.xml
new file mode 100644 (file)
index 0000000..819b43f
--- /dev/null
@@ -0,0 +1,95 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="T72x">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="4" counter="GPU_ACTIVE" title="Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active" units="cycles" />
+        <event offset="5" counter="IRQ_ACTIVE" title="Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending" units="cycles" />
+        <event offset="8" counter="JS0_ACTIVE" title="Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active" units="cycles" />
+        <event offset="11" counter="JS1_ACTIVE" title="Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active" units="cycles" />
+        <event offset="14" counter="JS2_ACTIVE" title="Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active" units="cycles" />
+        <event offset="6" counter="JS0_JOBS" title="Job Manager Jobs" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0" units="jobs" />
+        <event offset="9" counter="JS1_JOBS" title="Job Manager Jobs" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1" units="jobs" />
+        <event offset="12" counter="JS2_JOBS" title="Job Manager Jobs" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2" units="jobs" />
+        <event offset="7" counter="JS0_TASKS" title="Job Manager" name="Pixels" description="Number of Pixels completed in JS0" multiplier="256" units="pixels" />
+        <event offset="10" counter="JS1_TASKS" title="Job Manager Tasks" name="JS1 tasks" description="Number of Tasks completed in JS1" units="tasks" />
+        <event offset="13" counter="JS2_TASKS" title="Job Manager Tasks" name="JS2 tasks" description="Number of Tasks completed in JS2" units="tasks" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="86" counter="TI_ACTIVE" title="Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active" units="cycles" />
+        <event offset="70" counter="TI_POLYGONS" title="Tiler Primitives" name="Polygons" description="Number of polygons processed" units="primitives" />
+        <event offset="69" counter="TI_QUADS" title="Tiler Primitives" name="Quads" description="Number of quads processed" units="primitives" />
+        <event offset="68" counter="TI_TRIANGLES" title="Tiler Primitives" name="Triangles" description="Number of triangles processed" units="primitives" />
+        <event offset="72" counter="TI_LINES" title="Tiler Primitives" name="Lines" description="Number of lines processed" units="primitives" />
+        <event offset="71" counter="TI_POINTS" title="Tiler Primitives" name="Points" description="Number of points processed" units="primitives" />
+        <event offset="73" counter="TI_FRONT_FACING" title="Tiler Culling" name="Front facing prims" description="Number of front facing primitives" units="primitives" />
+        <event offset="74" counter="TI_BACK_FACING" title="Tiler Culling" name="Back facing prims" description="Number of back facing primitives" units="primitives" />
+        <event offset="75" counter="TI_PRIM_VISIBLE" title="Tiler Culling" name="Visible prims" description="Number of visible primitives" units="primitives" />
+        <event offset="76" counter="TI_PRIM_CULLED" title="Tiler Culling" name="Culled prims" description="Number of culled primitives" units="primitives" />
+        <event offset="77" counter="TI_PRIM_CLIPPED" title="Tiler Culling" name="Clipped prims" description="Number of clipped primitives" units="primitives" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="212" counter="TRIPIPE_ACTIVE" title="Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active" units="cycles" />
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active" units="cycles" />
+        <event offset="209" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active" units="cycles" />
+        <event offset="206" counter="FRAG_CYCLES_NO_TILE" title="Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer" units="cycles" />
+        <event offset="199" counter="FRAG_THREADS" title="Fragment Threads" name="Fragment threads" description="Number of fragment threads started" units="threads" />
+        <event offset="200" counter="FRAG_DUMMY_THREADS" title="Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started" units="threads" />
+        <event offset="204" counter="FRAG_THREADS_LZS_TEST" title="Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test" units="threads" />
+        <event offset="205" counter="FRAG_THREADS_LZS_KILLED" title="Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test" units="threads" />
+        <event offset="210" counter="COMPUTE_TASKS" title="Compute Tasks" name="Compute tasks" description="Number of compute tasks" units="tasks" />
+        <event offset="211" counter="COMPUTE_THREADS" title="Compute Threads" name="Compute threads" description="Number of compute threads started" units="threads" />
+        <event offset="197" counter="FRAG_PRIMITIVES" title="Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler" units="primitives" />
+        <event offset="198" counter="FRAG_PRIMITIVES_DROPPED" title="Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile" units="primitives" />
+        <event offset="201" counter="FRAG_QUADS_RAST" title="Fragment Quads" name="Quads rasterized" description="Number of quads rasterized" units="quads" />
+        <event offset="202" counter="FRAG_QUADS_EZS_TEST" title="Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test" units="quads" />
+        <event offset="203" counter="FRAG_QUADS_EZS_KILLED" title="Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test" units="quads" />
+        <event offset="207" counter="FRAG_NUM_TILES" title="Fragment" name="Pixels" description="Number of pixels rendered" multiplier="256" units="pixels" />
+        <event offset="208" counter="FRAG_TRANS_ELIM" title="Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination" units="tiles" />
+        <event offset="213" counter="ARITH_WORDS" title="Arithmetic Pipe" name="A instructions" description="Number of batched instructions executed by the A-pipe" units="instructions" />
+        <event offset="215" counter="LS_WORDS" title="Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe" units="instructions" />
+        <event offset="216" counter="LS_ISSUES" title="Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts" units="instructions" />
+        <event offset="219" counter="TEX_WORDS" title="Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe" units="instructions" />
+        <event offset="221" counter="TEX_ISSUES" title="Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation" units="instructions" />
+        <event offset="222" counter="LSC_READ_HITS" title="Load/Store Cache Reads" name="Read hits" description="Number of read hits in the Load/Store cache" units="requests" />
+        <event offset="223" counter="LSC_READ_MISSES" title="Load/Store Cache Reads" name="Read misses" description="Number of read misses in the Load/Store cache" units="requests" />
+        <event offset="224" counter="LSC_WRITE_HITS" title="Load/Store Cache Writes" name="Write hits" description="Number of write hits in the Load/Store cache" units="requests" />
+        <event offset="225" counter="LSC_WRITE_MISSES" title="Load/Store Cache Writes" name="Write misses" description="Number of write misses in the Load/Store cache" units="requests" />
+        <event offset="226" counter="LSC_ATOMIC_HITS" title="Load/Store Cache Atomics" name="Atomic hits" description="Number of atomic hits in the Load/Store cache" units="requests" />
+        <event offset="227" counter="LSC_ATOMIC_MISSES" title="Load/Store Cache Atomics" name="Atomic misses" description="Number of atomic misses in the Load/Store cache" units="requests" />
+        <event offset="228" counter="LSC_LINE_FETCHES" title="Load/Store Cache Bus" name="Line fetches" description="Number of line fetches in the Load/Store cache" units="requests" />
+        <event offset="229" counter="LSC_DIRTY_LINE" title="Load/Store Cache Bus" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache" units="requests" />
+        <event offset="230" counter="LSC_SNOOPS" title="Load/Store Cache Bus" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache" units="requests" />
+    </category>
+    <category name="L2 Cache" per_cpu="no">
+        <event offset="134" counter="L2_READ_SNOOP" title="L2 Cache Reads" name="Read snoops" description="Number of read transaction snoops" units="requests" />
+        <event offset="135" counter="L2_READ_HIT" title="L2 Cache Reads" name="L2 read hits" description="Number of reads hitting in the L2 cache" units="requests" />
+        <event offset="136" counter="L2_WRITE_SNOOP" title="L2 Cache Writes" name="Write snoops" description="Number of write transaction snoops" units="requests" />
+        <event offset="137" counter="L2_WRITE_HIT" title="L2 Cache Writes" name="L2 write hits" description="Number of writes hitting in the L2 cache" units="requests" />
+        <event offset="133" counter="L2_EXT_READ_BEAT" title="L2 Cache Ext Reads" name="External read bytes" description="Number of external bus read bytes" multiplier="16" units="bytes" />
+        <event offset="140" counter="L2_EXT_AR_STALL" title="L2 Cache Ext Reads" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect" units="cycles" />
+        <event offset="132" counter="L2_EXT_WRITE_BEAT" title="L2 Cache Ext Writes" name="External write bytes" description="Number of external bus write bytes" multiplier="16" units="bytes" />
+        <event offset="141" counter="L2_EXT_W_STALL" title="L2 Cache Ext Writes" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect" units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file
diff --git a/src/panfrost/perf/T76x.xml b/src/panfrost/perf/T76x.xml
new file mode 100644 (file)
index 0000000..344d18f
--- /dev/null
@@ -0,0 +1,110 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="T76x">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="6" counter="GPU_ACTIVE" title="Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active" units="cycles" />
+        <event offset="7" counter="IRQ_ACTIVE" title="Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending" units="cycles" />
+        <event offset="10" counter="JS0_ACTIVE" title="Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active" units="cycles" />
+        <event offset="18" counter="JS1_ACTIVE" title="Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active" units="cycles" />
+        <event offset="26" counter="JS2_ACTIVE" title="Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active" units="cycles" />
+        <event offset="8" counter="JS0_JOBS" title="Job Manager Jobs" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0" units="jobs" />
+        <event offset="16" counter="JS1_JOBS" title="Job Manager Jobs" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1" units="jobs" />
+        <event offset="24" counter="JS2_JOBS" title="Job Manager Jobs" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2" units="jobs" />
+        <event offset="9" counter="JS0_TASKS" title="Job Manager" name="Pixels" description="Number of Pixels completed in JS0" multiplier="1024" units="pixels" />
+        <event offset="17" counter="JS1_TASKS" title="Job Manager Tasks" name="JS1 tasks" description="Number of Tasks completed in JS1" units="tasks" />
+        <event offset="25" counter="JS2_TASKS" title="Job Manager Tasks" name="JS2 tasks" description="Number of Tasks completed in JS2" units="tasks" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="109" counter="TI_ACTIVE" title="Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active" units="cycles" />
+        <event offset="70" counter="TI_POLYGONS" title="Tiler Primitives" name="Polygons" description="Number of polygons processed" units="primitives" />
+        <event offset="69" counter="TI_QUADS" title="Tiler Primitives" name="Quads" description="Number of quads processed" units="primitives" />
+        <event offset="68" counter="TI_TRIANGLES" title="Tiler Primitives" name="Triangles" description="Number of triangles processed" units="primitives" />
+        <event offset="72" counter="TI_LINES" title="Tiler Primitives" name="Lines" description="Number of lines processed" units="primitives" />
+        <event offset="71" counter="TI_POINTS" title="Tiler Primitives" name="Points" description="Number of points processed" units="primitives" />
+        <event offset="75" counter="TI_FRONT_FACING" title="Tiler Culling" name="Front facing prims" description="Number of front facing primitives" units="primitives" />
+        <event offset="76" counter="TI_BACK_FACING" title="Tiler Culling" name="Back facing prims" description="Number of back facing primitives" units="primitives" />
+        <event offset="77" counter="TI_PRIM_VISIBLE" title="Tiler Culling" name="Visible prims" description="Number of visible primitives" units="primitives" />
+        <event offset="78" counter="TI_PRIM_CULLED" title="Tiler Culling" name="Culled prims" description="Number of culled primitives" units="primitives" />
+        <event offset="79" counter="TI_PRIM_CLIPPED" title="Tiler Culling" name="Clipped prims" description="Number of clipped primitives" units="primitives" />
+        <event offset="80" counter="TI_LEVEL0" title="Tiler Hierarchy" name="L0 prims" description="Number of primitives in hierarchy level 0" units="primitives" />
+        <event offset="81" counter="TI_LEVEL1" title="Tiler Hierarchy" name="L1 prims" description="Number of primitives in hierarchy level 1" units="primitives" />
+        <event offset="82" counter="TI_LEVEL2" title="Tiler Hierarchy" name="L2 prims" description="Number of primitives in hierarchy level 2" units="primitives" />
+        <event offset="83" counter="TI_LEVEL3" title="Tiler Hierarchy" name="L3 prims" description="Number of primitives in hierarchy level 3" units="primitives" />
+        <event offset="84" counter="TI_LEVEL4" title="Tiler Hierarchy" name="L4 prims" description="Number of primitives in hierarchy level 4" units="primitives" />
+        <event offset="85" counter="TI_LEVEL5" title="Tiler Hierarchy" name="L5 prims" description="Number of primitives in hierarchy level 5" units="primitives" />
+        <event offset="86" counter="TI_LEVEL6" title="Tiler Hierarchy" name="L6 prims" description="Number of primitives in hierarchy level 6" units="primitives" />
+        <event offset="87" counter="TI_LEVEL7" title="Tiler Hierarchy" name="L7 prims" description="Number of primitives in hierarchy level 7" units="primitives" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="218" counter="TRIPIPE_ACTIVE" title="Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active" units="cycles" />
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active" units="cycles" />
+        <event offset="214" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active" units="cycles" />
+        <event offset="211" counter="FRAG_CYCLES_NO_TILE" title="Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer" units="cycles" />
+        <event offset="200" counter="FRAG_CYCLES_FPKQ_ACTIVE" title="Core Cycles" name="Fragment cycles pre-pipe buffer not empty" description="Number of cycles the pre-pipe queue contains quads" units="cycles" />
+        <event offset="204" counter="FRAG_THREADS" title="Fragment Threads" name="Fragment threads" description="Number of fragment threads started" units="threads" />
+        <event offset="205" counter="FRAG_DUMMY_THREADS" title="Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started" units="threads" />
+        <event offset="209" counter="FRAG_THREADS_LZS_TEST" title="Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test" units="threads" />
+        <event offset="210" counter="FRAG_THREADS_LZS_KILLED" title="Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test" units="threads" />
+        <event offset="215" counter="COMPUTE_TASKS" title="Compute Tasks" name="Compute tasks" description="Number of compute tasks" units="tasks" />
+        <event offset="216" counter="COMPUTE_THREADS" title="Compute Threads" name="Compute threads" description="Number of compute threads started" units="threads" />
+        <event offset="197" counter="FRAG_PRIMITIVES" title="Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler" units="primitives" />
+        <event offset="198" counter="FRAG_PRIMITIVES_DROPPED" title="Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile" units="primitives" />
+        <event offset="206" counter="FRAG_QUADS_RAST" title="Fragment Quads" name="Quads rasterized" description="Number of quads rasterized" units="quads" />
+        <event offset="207" counter="FRAG_QUADS_EZS_TEST" title="Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test" units="quads" />
+        <event offset="208" counter="FRAG_QUADS_EZS_KILLED" title="Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test" units="quads" />
+        <event offset="212" counter="FRAG_NUM_TILES" title="Fragment" name="Pixels" description="Number of pixels rendered" multiplier="1024" units="pixels" />
+        <event offset="213" counter="FRAG_TRANS_ELIM" title="Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination" units="tiles" />
+        <event offset="219" counter="ARITH_WORDS" title="Arithmetic Pipe" name="A instructions" description="Number of instructions completed by the the A-pipe (normalized per pipeline)" units="instructions" />
+        <event offset="223" counter="LS_WORDS" title="Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe" units="instructions" />
+        <event offset="224" counter="LS_ISSUES" title="Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts" units="instructions" />
+        <event offset="230" counter="TEX_WORDS" title="Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe" units="instructions" />
+        <event offset="234" counter="TEX_ISSUES" title="Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation" units="instructions" />
+        <event offset="235" counter="TEX_RECIRC_FMISS" title="Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss" units="requests" />
+        <event offset="241" counter="LSC_READ_OP" title="Load/Store Cache Reads" name="Read operations" description="Number of read operations in the Load/Store cache" units="requests" />
+        <event offset="240" counter="LSC_READ_HITS" title="Load/Store Cache Reads" name="Read hits" description="Number of read hits in the Load/Store cache" units="requests" />
+        <event offset="243" counter="LSC_WRITE_OP" title="Load/Store Cache Writes" name="Write operations" description="Number of write operations in the Load/Store cache" units="requests" />
+        <event offset="242" counter="LSC_WRITE_HITS" title="Load/Store Cache Writes" name="Write hits" description="Number of write hits in the Load/Store cache" units="requests" />
+        <event offset="245" counter="LSC_ATOMIC_OP" title="Load/Store Cache Atomics" name="Atomic operations" description="Number of atomic operations in the Load/Store cache" units="requests" />
+        <event offset="244" counter="LSC_ATOMIC_HITS" title="Load/Store Cache Atomics" name="Atomic hits" description="Number of atomic hits in the Load/Store cache" units="requests" />
+        <event offset="246" counter="LSC_LINE_FETCHES" title="Load/Store Cache Bus" name="Line fetches" description="Number of line fetches in the Load/Store cache" units="requests" />
+        <event offset="247" counter="LSC_DIRTY_LINE" title="Load/Store Cache Bus" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache" units="requests" />
+        <event offset="248" counter="LSC_SNOOPS" title="Load/Store Cache Bus" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache" units="requests" />
+    </category>
+    <category name="L2 Cache" per_cpu="no">
+        <event offset="161" counter="L2_READ_LOOKUP" title="L2 Cache Reads" name="L2 read lookups" description="Number of reads into the L2 cache" units="requests" />
+        <event offset="164" counter="L2_READ_SNOOP" title="L2 Cache Reads" name="Read snoops" description="Number of read transaction snoops" units="requests" />
+        <event offset="165" counter="L2_READ_HIT" title="L2 Cache Reads" name="L2 read hits" description="Number of reads hitting in the L2 cache" units="requests" />
+        <event offset="170" counter="L2_WRITE_SNOOP" title="L2 Cache Writes" name="Write snoops" description="Number of write transaction snoops" units="requests" />
+        <event offset="171" counter="L2_WRITE_HIT" title="L2 Cache Writes" name="L2 write hits" description="Number of writes hitting in the L2 cache" units="requests" />
+        <event offset="167" counter="L2_WRITE_LOOKUP" title="L2 Cache Writes" name="L2 write lookups" description="Number of writes into the L2 cache" units="requests" />
+        <event offset="159" counter="L2_EXT_READ_BEATS" title="L2 Cache Ext Reads" name="External read bytes" description="Number of external bus read bytes" multiplier="16" units="bytes" />
+        <event offset="182" counter="L2_EXT_AR_STALL" title="L2 Cache Ext Reads" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect" units="cycles" />
+        <event offset="188" counter="L2_EXT_R_BUF_FULL" title="L2 Cache Ext Reads" name="External bus response buffer full" description="Number of cycles a valid request is blocked by a full response buffer" units="cycles" />
+        <event offset="184" counter="L2_EXT_RD_BUF_FULL" title="L2 Cache Ext Reads" name="External bus read data buffer full" description="Number of cycles a valid request is blocked by a full read data buffer" units="cycles" />
+        <event offset="158" counter="L2_EXT_WRITE_BEATS" title="L2 Cache Ext Writes" name="External write bytes" description="Number of external bus write bytes" multiplier="16" units="bytes" />
+        <event offset="186" counter="L2_EXT_W_STALL" title="L2 Cache Ext Writes" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect" units="cycles" />
+        <event offset="187" counter="L2_EXT_W_BUF_FULL" title="L2 Cache Ext Writes" name="External bus write buffer full" description="Number of cycles a valid request is blocked by a full write buffer" units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file
diff --git a/src/panfrost/perf/T82x.xml b/src/panfrost/perf/T82x.xml
new file mode 100644 (file)
index 0000000..abbe1ab
--- /dev/null
@@ -0,0 +1,102 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="T82x">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="6" counter="GPU_ACTIVE" title="Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active" units="cycles" />
+        <event offset="7" counter="IRQ_ACTIVE" title="Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending" units="cycles" />
+        <event offset="10" counter="JS0_ACTIVE" title="Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active" units="cycles" />
+        <event offset="18" counter="JS1_ACTIVE" title="Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active" units="cycles" />
+        <event offset="26" counter="JS2_ACTIVE" title="Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active" units="cycles" />
+        <event offset="8" counter="JS0_JOBS" title="Job Manager Jobs" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0" units="jobs" />
+        <event offset="16" counter="JS1_JOBS" title="Job Manager Jobs" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1" units="jobs" />
+        <event offset="24" counter="JS2_JOBS" title="Job Manager Jobs" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2" units="jobs" />
+        <event offset="9" counter="JS0_TASKS" title="Job Manager" name="Pixels" description="Number of Pixels completed in JS0" multiplier="1024" units="pixels" />
+        <event offset="17" counter="JS1_TASKS" title="Job Manager Tasks" name="JS1 tasks" description="Number of Tasks completed in JS1" units="tasks" />
+        <event offset="25" counter="JS2_TASKS" title="Job Manager Tasks" name="JS2 tasks" description="Number of Tasks completed in JS2" units="tasks" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="86" counter="TI_ACTIVE" title="Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active" units="cycles" />
+        <event offset="70" counter="TI_POLYGONS" title="Tiler Primitives" name="Polygons" description="Number of polygons processed" units="primitives" />
+        <event offset="69" counter="TI_QUADS" title="Tiler Primitives" name="Quads" description="Number of quads processed" units="primitives" />
+        <event offset="68" counter="TI_TRIANGLES" title="Tiler Primitives" name="Triangles" description="Number of triangles processed" units="primitives" />
+        <event offset="72" counter="TI_LINES" title="Tiler Primitives" name="Lines" description="Number of lines processed" units="primitives" />
+        <event offset="71" counter="TI_POINTS" title="Tiler Primitives" name="Points" description="Number of points processed" units="primitives" />
+        <event offset="73" counter="TI_FRONT_FACING" title="Tiler Culling" name="Front facing prims" description="Number of front facing primitives" units="primitives" />
+        <event offset="74" counter="TI_BACK_FACING" title="Tiler Culling" name="Back facing prims" description="Number of back facing primitives" units="primitives" />
+        <event offset="75" counter="TI_PRIM_VISIBLE" title="Tiler Culling" name="Visible prims" description="Number of visible primitives" units="primitives" />
+        <event offset="76" counter="TI_PRIM_CULLED" title="Tiler Culling" name="Culled prims" description="Number of culled primitives" units="primitives" />
+        <event offset="77" counter="TI_PRIM_CLIPPED" title="Tiler Culling" name="Clipped prims" description="Number of clipped primitives" units="primitives" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="218" counter="TRIPIPE_ACTIVE" title="Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active" units="cycles" />
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active" units="cycles" />
+        <event offset="214" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active" units="cycles" />
+        <event offset="211" counter="FRAG_CYCLES_NO_TILE" title="Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer" units="cycles" />
+        <event offset="200" counter="FRAG_CYCLES_FPKQ_ACTIVE" title="Core Cycles" name="Fragment cycles pre-pipe buffer not empty" description="Number of cycles the pre-pipe queue contains quads" units="cycles" />
+        <event offset="204" counter="FRAG_THREADS" title="Fragment Threads" name="Fragment threads" description="Number of fragment threads started" units="threads" />
+        <event offset="205" counter="FRAG_DUMMY_THREADS" title="Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started" units="threads" />
+        <event offset="209" counter="FRAG_THREADS_LZS_TEST" title="Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test" units="threads" />
+        <event offset="210" counter="FRAG_THREADS_LZS_KILLED" title="Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test" units="threads" />
+        <event offset="215" counter="COMPUTE_TASKS" title="Compute Tasks" name="Compute tasks" description="Number of compute tasks" units="tasks" />
+        <event offset="216" counter="COMPUTE_THREADS" title="Compute Threads" name="Compute threads" description="Number of compute threads started" units="threads" />
+        <event offset="197" counter="FRAG_PRIMITIVES" title="Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler" units="primitives" />
+        <event offset="198" counter="FRAG_PRIMITIVES_DROPPED" title="Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile" units="primitives" />
+        <event offset="206" counter="FRAG_QUADS_RAST" title="Fragment Quads" name="Quads rasterized" description="Number of quads rasterized" units="quads" />
+        <event offset="207" counter="FRAG_QUADS_EZS_TEST" title="Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test" units="quads" />
+        <event offset="208" counter="FRAG_QUADS_EZS_KILLED" title="Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test" units="quads" />
+        <event offset="212" counter="FRAG_NUM_TILES" title="Fragment" name="Pixels" description="Number of pixels rendered" multiplier="1024" units="pixels" />
+        <event offset="213" counter="FRAG_TRANS_ELIM" title="Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination" units="tiles" />
+        <event offset="219" counter="ARITH_WORDS" title="Arithmetic Pipe" name="A instructions" description="Number of batched instructions executed by the A-pipe (normalized per pipe)" units="instructions" />
+        <event offset="223" counter="LS_WORDS" title="Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe" units="instructions" />
+        <event offset="224" counter="LS_ISSUES" title="Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts" units="instructions" />
+        <event offset="230" counter="TEX_WORDS" title="Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe" units="instructions" />
+        <event offset="234" counter="TEX_ISSUES" title="Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation" units="instructions" />
+        <event offset="235" counter="TEX_RECIRC_FMISS" title="Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss" units="requests" />
+        <event offset="241" counter="LSC_READ_OP" title="Load/Store Cache Reads" name="Read operations" description="Number of read operations in the Load/Store cache" units="requests" />
+        <event offset="240" counter="LSC_READ_HITS" title="Load/Store Cache Reads" name="Read hits" description="Number of read hits in the Load/Store cache" units="requests" />
+        <event offset="243" counter="LSC_WRITE_OP" title="Load/Store Cache Writes" name="Write operations" description="Number of write operations in the Load/Store cache" units="requests" />
+        <event offset="242" counter="LSC_WRITE_HITS" title="Load/Store Cache Writes" name="Write hits" description="Number of write hits in the Load/Store cache" units="requests" />
+        <event offset="245" counter="LSC_ATOMIC_OP" title="Load/Store Cache Atomics" name="Atomic operations" description="Number of atomic operations in the Load/Store cache" units="requests" />
+        <event offset="244" counter="LSC_ATOMIC_HITS" title="Load/Store Cache Atomics" name="Atomic hits" description="Number of atomic hits in the Load/Store cache" units="requests" />
+        <event offset="246" counter="LSC_LINE_FETCHES" title="Load/Store Cache Bus" name="Line fetches" description="Number of line fetches in the Load/Store cache" units="requests" />
+        <event offset="247" counter="LSC_DIRTY_LINE" title="Load/Store Cache Bus" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache" units="requests" />
+        <event offset="248" counter="LSC_SNOOPS" title="Load/Store Cache Bus" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache" units="requests" />
+    </category>
+    <category name="L2 Cache" per_cpu="no">
+        <event offset="161" counter="L2_READ_LOOKUP" title="L2 Cache Reads" name="L2 read lookups" description="Number of reads into the L2 cache" units="requests" />
+        <event offset="164" counter="L2_READ_SNOOP" title="L2 Cache Reads" name="Read snoops" description="Number of read transaction snoops" units="requests" />
+        <event offset="165" counter="L2_READ_HIT" title="L2 Cache Reads" name="L2 read hits" description="Number of reads hitting in the L2 cache" units="requests" />
+        <event offset="170" counter="L2_WRITE_SNOOP" title="L2 Cache Writes" name="Write snoops" description="Number of write transaction snoops" units="requests" />
+        <event offset="171" counter="L2_WRITE_HIT" title="L2 Cache Writes" name="L2 write hits" description="Number of writes hitting in the L2 cache" units="requests" />
+        <event offset="167" counter="L2_WRITE_LOOKUP" title="L2 Cache Writes" name="L2 write lookups" description="Number of writes into the L2 cache" units="requests" />
+        <event offset="159" counter="L2_EXT_READ_BEATS" title="L2 Cache Ext Reads" name="External read bytes" description="Number of external bus read bytes" multiplier="16" units="bytes" />
+        <event offset="182" counter="L2_EXT_AR_STALL" title="L2 Cache Ext Reads" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect" units="cycles" />
+        <event offset="188" counter="L2_EXT_R_BUF_FULL" title="L2 Cache Ext Reads" name="External bus response buffer full" description="Number of cycles a valid request is blocked by a full response buffer" units="cycles" />
+        <event offset="184" counter="L2_EXT_RD_BUF_FULL" title="L2 Cache Ext Reads" name="External bus read data buffer full" description="Number of cycles a valid request is blocked by a full read data buffer" units="cycles" />
+        <event offset="158" counter="L2_EXT_WRITE_BEATS" title="L2 Cache Ext Writes" name="External write bytes" description="Number of external bus write bytes" multiplier="16" units="bytes" />
+        <event offset="186" counter="L2_EXT_W_STALL" title="L2 Cache Ext Writes" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect" units="cycles" />
+        <event offset="187" counter="L2_EXT_W_BUF_FULL" title="L2 Cache Ext Writes" name="External bus write buffer full" description="Number of cycles a valid request is blocked by a full write buffer" units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file
diff --git a/src/panfrost/perf/T83x.xml b/src/panfrost/perf/T83x.xml
new file mode 100644 (file)
index 0000000..6834c92
--- /dev/null
@@ -0,0 +1,102 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="T83x">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="6" counter="GPU_ACTIVE" title="Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active" units="cycles" />
+        <event offset="7" counter="IRQ_ACTIVE" title="Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending" units="cycles" />
+        <event offset="10" counter="JS0_ACTIVE" title="Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active" units="cycles" />
+        <event offset="18" counter="JS1_ACTIVE" title="Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active" units="cycles" />
+        <event offset="26" counter="JS2_ACTIVE" title="Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active" units="cycles" />
+        <event offset="8" counter="JS0_JOBS" title="Job Manager Jobs" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0" units="jobs" />
+        <event offset="16" counter="JS1_JOBS" title="Job Manager Jobs" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1" units="jobs" />
+        <event offset="24" counter="JS2_JOBS" title="Job Manager Jobs" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2" units="jobs" />
+        <event offset="9" counter="JS0_TASKS" title="Job Manager" name="Pixels" description="Number of Pixels completed in JS0" multiplier="1024" units="pixels" />
+        <event offset="17" counter="JS1_TASKS" title="Job Manager Tasks" name="JS1 tasks" description="Number of Tasks completed in JS1" units="tasks" />
+        <event offset="25" counter="JS2_TASKS" title="Job Manager Tasks" name="JS2 tasks" description="Number of Tasks completed in JS2" units="tasks" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="86" counter="TI_ACTIVE" title="Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active" units="cycles" />
+        <event offset="70" counter="TI_POLYGONS" title="Tiler Primitives" name="Polygons" description="Number of polygons processed" units="primitives" />
+        <event offset="69" counter="TI_QUADS" title="Tiler Primitives" name="Quads" description="Number of quads processed" units="primitives" />
+        <event offset="68" counter="TI_TRIANGLES" title="Tiler Primitives" name="Triangles" description="Number of triangles processed" units="primitives" />
+        <event offset="72" counter="TI_LINES" title="Tiler Primitives" name="Lines" description="Number of lines processed" units="primitives" />
+        <event offset="71" counter="TI_POINTS" title="Tiler Primitives" name="Points" description="Number of points processed" units="primitives" />
+        <event offset="73" counter="TI_FRONT_FACING" title="Tiler Culling" name="Front facing prims" description="Number of front facing primitives" units="primitives" />
+        <event offset="74" counter="TI_BACK_FACING" title="Tiler Culling" name="Back facing prims" description="Number of back facing primitives" units="primitives" />
+        <event offset="75" counter="TI_PRIM_VISIBLE" title="Tiler Culling" name="Visible prims" description="Number of visible primitives" units="primitives" />
+        <event offset="76" counter="TI_PRIM_CULLED" title="Tiler Culling" name="Culled prims" description="Number of culled primitives" units="primitives" />
+        <event offset="77" counter="TI_PRIM_CLIPPED" title="Tiler Culling" name="Clipped prims" description="Number of clipped primitives" units="primitives" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="218" counter="TRIPIPE_ACTIVE" title="Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active" units="cycles" />
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active" units="cycles" />
+        <event offset="214" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active" units="cycles" />
+        <event offset="211" counter="FRAG_CYCLES_NO_TILE" title="Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer" units="cycles" />
+        <event offset="200" counter="FRAG_CYCLES_FPKQ_ACTIVE" title="Core Cycles" name="Fragment cycles pre-pipe buffer not empty" description="Number of cycles the pre-pipe queue contains quads" units="cycles" />
+        <event offset="204" counter="FRAG_THREADS" title="Fragment Threads" name="Fragment threads" description="Number of fragment threads started" units="threads" />
+        <event offset="205" counter="FRAG_DUMMY_THREADS" title="Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started" units="threads" />
+        <event offset="209" counter="FRAG_THREADS_LZS_TEST" title="Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test" units="threads" />
+        <event offset="210" counter="FRAG_THREADS_LZS_KILLED" title="Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test" units="threads" />
+        <event offset="215" counter="COMPUTE_TASKS" title="Compute Tasks" name="Compute tasks" description="Number of compute tasks" units="tasks" />
+        <event offset="216" counter="COMPUTE_THREADS" title="Compute Threads" name="Compute threads" description="Number of compute threads started" units="threads" />
+        <event offset="197" counter="FRAG_PRIMITIVES" title="Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler" units="primitives" />
+        <event offset="198" counter="FRAG_PRIMITIVES_DROPPED" title="Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile" units="primitives" />
+        <event offset="206" counter="FRAG_QUADS_RAST" title="Fragment Quads" name="Quads rasterized" description="Number of quads rasterized" units="quads" />
+        <event offset="207" counter="FRAG_QUADS_EZS_TEST" title="Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test" units="quads" />
+        <event offset="208" counter="FRAG_QUADS_EZS_KILLED" title="Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test" units="quads" />
+        <event offset="212" counter="FRAG_NUM_TILES" title="Fragment" name="Pixels" description="Number of pixels rendered" multiplier="1024" units="pixels" />
+        <event offset="213" counter="FRAG_TRANS_ELIM" title="Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination" units="tiles" />
+        <event offset="219" counter="ARITH_WORDS" title="Arithmetic Pipe" name="A instructions" description="Number of batched instructions executed by the A-pipe (normalized per pipe)" units="instructions" />
+        <event offset="223" counter="LS_WORDS" title="Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe" units="instructions" />
+        <event offset="224" counter="LS_ISSUES" title="Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts" units="instructions" />
+        <event offset="230" counter="TEX_WORDS" title="Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe" units="instructions" />
+        <event offset="234" counter="TEX_ISSUES" title="Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation" units="instructions" />
+        <event offset="235" counter="TEX_RECIRC_FMISS" title="Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss" units="requests" />
+        <event offset="241" counter="LSC_READ_OP" title="Load/Store Cache Reads" name="Read operations" description="Number of read operations in the Load/Store cache" units="requests" />
+        <event offset="240" counter="LSC_READ_HITS" title="Load/Store Cache Reads" name="Read hits" description="Number of read hits in the Load/Store cache" units="requests" />
+        <event offset="243" counter="LSC_WRITE_OP" title="Load/Store Cache Writes" name="Write operations" description="Number of write operations in the Load/Store cache" units="requests" />
+        <event offset="242" counter="LSC_WRITE_HITS" title="Load/Store Cache Writes" name="Write hits" description="Number of write hits in the Load/Store cache" units="requests" />
+        <event offset="245" counter="LSC_ATOMIC_OP" title="Load/Store Cache Atomics" name="Atomic operations" description="Number of atomic operations in the Load/Store cache" units="requests" />
+        <event offset="244" counter="LSC_ATOMIC_HITS" title="Load/Store Cache Atomics" name="Atomic hits" description="Number of atomic hits in the Load/Store cache" units="requests" />
+        <event offset="246" counter="LSC_LINE_FETCHES" title="Load/Store Cache Bus" name="Line fetches" description="Number of line fetches in the Load/Store cache" units="requests" />
+        <event offset="247" counter="LSC_DIRTY_LINE" title="Load/Store Cache Bus" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache" units="requests" />
+        <event offset="248" counter="LSC_SNOOPS" title="Load/Store Cache Bus" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache" units="requests" />
+    </category>
+    <category name="L2 Cache" per_cpu="no">
+        <event offset="161" counter="L2_READ_LOOKUP" title="L2 Cache Reads" name="L2 read lookups" description="Number of reads into the L2 cache" units="requests" />
+        <event offset="164" counter="L2_READ_SNOOP" title="L2 Cache Reads" name="Read snoops" description="Number of read transaction snoops" units="requests" />
+        <event offset="165" counter="L2_READ_HIT" title="L2 Cache Reads" name="L2 read hits" description="Number of reads hitting in the L2 cache" units="requests" />
+        <event offset="170" counter="L2_WRITE_SNOOP" title="L2 Cache Writes" name="Write snoops" description="Number of write transaction snoops" units="requests" />
+        <event offset="171" counter="L2_WRITE_HIT" title="L2 Cache Writes" name="L2 write hits" description="Number of writes hitting in the L2 cache" units="requests" />
+        <event offset="167" counter="L2_WRITE_LOOKUP" title="L2 Cache Writes" name="L2 write lookups" description="Number of writes into the L2 cache" units="requests" />
+        <event offset="159" counter="L2_EXT_READ_BEATS" title="L2 Cache Ext Reads" name="External read bytes" description="Number of external bus read bytes" multiplier="16" units="bytes" />
+        <event offset="182" counter="L2_EXT_AR_STALL" title="L2 Cache Ext Reads" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect" units="cycles" />
+        <event offset="188" counter="L2_EXT_R_BUF_FULL" title="L2 Cache Ext Reads" name="External bus response buffer full" description="Number of cycles a valid request is blocked by a full response buffer" units="cycles" />
+        <event offset="184" counter="L2_EXT_RD_BUF_FULL" title="L2 Cache Ext Reads" name="External bus read data buffer full" description="Number of cycles a valid request is blocked by a full read data buffer" units="cycles" />
+        <event offset="158" counter="L2_EXT_WRITE_BEATS" title="L2 Cache Ext Writes" name="External write bytes" description="Number of external bus write bytes" multiplier="16" units="bytes" />
+        <event offset="186" counter="L2_EXT_W_STALL" title="L2 Cache Ext Writes" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect" units="cycles" />
+        <event offset="187" counter="L2_EXT_W_BUF_FULL" title="L2 Cache Ext Writes" name="External bus write buffer full" description="Number of cycles a valid request is blocked by a full write buffer" units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file
diff --git a/src/panfrost/perf/T86x.xml b/src/panfrost/perf/T86x.xml
new file mode 100644 (file)
index 0000000..d3a6d18
--- /dev/null
@@ -0,0 +1,110 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="T86x">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="6" counter="GPU_ACTIVE" title="Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active" units="cycles" />
+        <event offset="7" counter="IRQ_ACTIVE" title="Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending" units="cycles" />
+        <event offset="10" counter="JS0_ACTIVE" title="Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active" units="cycles" />
+        <event offset="18" counter="JS1_ACTIVE" title="Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active" units="cycles" />
+        <event offset="26" counter="JS2_ACTIVE" title="Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active" units="cycles" />
+        <event offset="8" counter="JS0_JOBS" title="Job Manager Jobs" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0" units="jobs" />
+        <event offset="16" counter="JS1_JOBS" title="Job Manager Jobs" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1" units="jobs" />
+        <event offset="24" counter="JS2_JOBS" title="Job Manager Jobs" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2" units="jobs" />
+        <event offset="9" counter="JS0_TASKS" title="Job Manager" name="Pixels" description="Number of Pixels completed in JS0" multiplier="1024" units="pixels" />
+        <event offset="17" counter="JS1_TASKS" title="Job Manager Tasks" name="JS1 tasks" description="Number of Tasks completed in JS1" units="tasks" />
+        <event offset="25" counter="JS2_TASKS" title="Job Manager Tasks" name="JS2 tasks" description="Number of Tasks completed in JS2" units="tasks" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="109" counter="TI_ACTIVE" title="Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active" units="cycles" />
+        <event offset="70" counter="TI_POLYGONS" title="Tiler Primitives" name="Polygons" description="Number of polygons processed" units="primitives" />
+        <event offset="69" counter="TI_QUADS" title="Tiler Primitives" name="Quads" description="Number of quads processed" units="primitives" />
+        <event offset="68" counter="TI_TRIANGLES" title="Tiler Primitives" name="Triangles" description="Number of triangles processed" units="primitives" />
+        <event offset="72" counter="TI_LINES" title="Tiler Primitives" name="Lines" description="Number of lines processed" units="primitives" />
+        <event offset="71" counter="TI_POINTS" title="Tiler Primitives" name="Points" description="Number of points processed" units="primitives" />
+        <event offset="75" counter="TI_FRONT_FACING" title="Tiler Culling" name="Front facing prims" description="Number of front facing primitives" units="primitives" />
+        <event offset="76" counter="TI_BACK_FACING" title="Tiler Culling" name="Back facing prims" description="Number of back facing primitives" units="primitives" />
+        <event offset="77" counter="TI_PRIM_VISIBLE" title="Tiler Culling" name="Visible prims" description="Number of visible primitives" units="primitives" />
+        <event offset="78" counter="TI_PRIM_CULLED" title="Tiler Culling" name="Culled prims" description="Number of culled primitives" units="primitives" />
+        <event offset="79" counter="TI_PRIM_CLIPPED" title="Tiler Culling" name="Clipped prims" description="Number of clipped primitives" units="primitives" />
+        <event offset="80" counter="TI_LEVEL0" title="Tiler Hierarchy" name="L0 prims" description="Number of primitives in hierarchy level 0" units="primitives" />
+        <event offset="81" counter="TI_LEVEL1" title="Tiler Hierarchy" name="L1 prims" description="Number of primitives in hierarchy level 1" units="primitives" />
+        <event offset="82" counter="TI_LEVEL2" title="Tiler Hierarchy" name="L2 prims" description="Number of primitives in hierarchy level 2" units="primitives" />
+        <event offset="83" counter="TI_LEVEL3" title="Tiler Hierarchy" name="L3 prims" description="Number of primitives in hierarchy level 3" units="primitives" />
+        <event offset="84" counter="TI_LEVEL4" title="Tiler Hierarchy" name="L4 prims" description="Number of primitives in hierarchy level 4" units="primitives" />
+        <event offset="85" counter="TI_LEVEL5" title="Tiler Hierarchy" name="L5 prims" description="Number of primitives in hierarchy level 5" units="primitives" />
+        <event offset="86" counter="TI_LEVEL6" title="Tiler Hierarchy" name="L6 prims" description="Number of primitives in hierarchy level 6" units="primitives" />
+        <event offset="87" counter="TI_LEVEL7" title="Tiler Hierarchy" name="L7 prims" description="Number of primitives in hierarchy level 7" units="primitives" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="218" counter="TRIPIPE_ACTIVE" title="Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active" units="cycles" />
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active" units="cycles" />
+        <event offset="214" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active" units="cycles" />
+        <event offset="211" counter="FRAG_CYCLES_NO_TILE" title="Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer" units="cycles" />
+        <event offset="200" counter="FRAG_CYCLES_FPKQ_ACTIVE" title="Core Cycles" name="Fragment cycles pre-pipe buffer not empty" description="Number of cycles the pre-pipe queue contains quads" units="cycles" />
+        <event offset="204" counter="FRAG_THREADS" title="Fragment Threads" name="Fragment threads" description="Number of fragment threads started" units="threads" />
+        <event offset="205" counter="FRAG_DUMMY_THREADS" title="Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started" units="threads" />
+        <event offset="209" counter="FRAG_THREADS_LZS_TEST" title="Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test" units="threads" />
+        <event offset="210" counter="FRAG_THREADS_LZS_KILLED" title="Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test" units="threads" />
+        <event offset="215" counter="COMPUTE_TASKS" title="Compute Tasks" name="Compute tasks" description="Number of compute tasks" units="tasks" />
+        <event offset="216" counter="COMPUTE_THREADS" title="Compute Threads" name="Compute threads" description="Number of compute threads started" units="threads" />
+        <event offset="197" counter="FRAG_PRIMITIVES" title="Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler" units="primitives" />
+        <event offset="198" counter="FRAG_PRIMITIVES_DROPPED" title="Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile" units="primitives" />
+        <event offset="206" counter="FRAG_QUADS_RAST" title="Fragment Quads" name="Quads rasterized" description="Number of quads rasterized" units="quads" />
+        <event offset="207" counter="FRAG_QUADS_EZS_TEST" title="Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test" units="quads" />
+        <event offset="208" counter="FRAG_QUADS_EZS_KILLED" title="Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test" units="quads" />
+        <event offset="212" counter="FRAG_NUM_TILES" title="Fragment" name="Pixels" description="Number of pixels rendered" multiplier="1024" units="pixels" />
+        <event offset="213" counter="FRAG_TRANS_ELIM" title="Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination" units="tiles" />
+        <event offset="219" counter="ARITH_WORDS" title="Arithmetic Pipe" name="A instructions" description="Number of instructions completed by the the A-pipe (normalized per pipeline)" units="instructions" />
+        <event offset="223" counter="LS_WORDS" title="Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe" units="instructions" />
+        <event offset="224" counter="LS_ISSUES" title="Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts" units="instructions" />
+        <event offset="230" counter="TEX_WORDS" title="Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe" units="instructions" />
+        <event offset="234" counter="TEX_ISSUES" title="Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation" units="instructions" />
+        <event offset="235" counter="TEX_RECIRC_FMISS" title="Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss" units="requests" />
+        <event offset="241" counter="LSC_READ_OP" title="Load/Store Cache Reads" name="Read operations" description="Number of read operations in the Load/Store cache" units="requests" />
+        <event offset="240" counter="LSC_READ_HITS" title="Load/Store Cache Reads" name="Read hits" description="Number of read hits in the Load/Store cache" units="requests" />
+        <event offset="243" counter="LSC_WRITE_OP" title="Load/Store Cache Writes" name="Write operations" description="Number of write operations in the Load/Store cache" units="requests" />
+        <event offset="242" counter="LSC_WRITE_HITS" title="Load/Store Cache Writes" name="Write hits" description="Number of write hits in the Load/Store cache" units="requests" />
+        <event offset="245" counter="LSC_ATOMIC_OP" title="Load/Store Cache Atomics" name="Atomic operations" description="Number of atomic operations in the Load/Store cache" units="requests" />
+        <event offset="244" counter="LSC_ATOMIC_HITS" title="Load/Store Cache Atomics" name="Atomic hits" description="Number of atomic hits in the Load/Store cache" units="requests" />
+        <event offset="246" counter="LSC_LINE_FETCHES" title="Load/Store Cache Bus" name="Line fetches" description="Number of line fetches in the Load/Store cache" units="requests" />
+        <event offset="247" counter="LSC_DIRTY_LINE" title="Load/Store Cache Bus" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache" units="requests" />
+        <event offset="248" counter="LSC_SNOOPS" title="Load/Store Cache Bus" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache" units="requests" />
+    </category>
+    <category name="L2 Cache" per_cpu="no">
+        <event offset="161" counter="L2_READ_LOOKUP" title="L2 Cache Reads" name="L2 read lookups" description="Number of reads into the L2 cache" units="requests" />
+        <event offset="164" counter="L2_READ_SNOOP" title="L2 Cache Reads" name="Read snoops" description="Number of read transaction snoops" units="requests" />
+        <event offset="165" counter="L2_READ_HIT" title="L2 Cache Reads" name="L2 read hits" description="Number of reads hitting in the L2 cache" units="requests" />
+        <event offset="170" counter="L2_WRITE_SNOOP" title="L2 Cache Writes" name="Write snoops" description="Number of write transaction snoops" units="requests" />
+        <event offset="171" counter="L2_WRITE_HIT" title="L2 Cache Writes" name="L2 write hits" description="Number of writes hitting in the L2 cache" units="requests" />
+        <event offset="167" counter="L2_WRITE_LOOKUP" title="L2 Cache Writes" name="L2 write lookups" description="Number of writes into the L2 cache" units="requests" />
+        <event offset="159" counter="L2_EXT_READ_BEATS" title="L2 Cache Ext Reads" name="External read bytes" description="Number of external bus read bytes" multiplier="16" units="bytes" />
+        <event offset="182" counter="L2_EXT_AR_STALL" title="L2 Cache Ext Reads" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect" units="cycles" />
+        <event offset="188" counter="L2_EXT_R_BUF_FULL" title="L2 Cache Ext Reads" name="External bus response buffer full" description="Number of cycles a valid request is blocked by a full response buffer" units="cycles" />
+        <event offset="184" counter="L2_EXT_RD_BUF_FULL" title="L2 Cache Ext Reads" name="External bus read data buffer full" description="Number of cycles a valid request is blocked by a full read data buffer" units="cycles" />
+        <event offset="158" counter="L2_EXT_WRITE_BEATS" title="L2 Cache Ext Writes" name="External write bytes" description="Number of external bus write bytes" multiplier="16" units="bytes" />
+        <event offset="186" counter="L2_EXT_W_STALL" title="L2 Cache Ext Writes" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect" units="cycles" />
+        <event offset="187" counter="L2_EXT_W_BUF_FULL" title="L2 Cache Ext Writes" name="External bus write buffer full" description="Number of cycles a valid request is blocked by a full write buffer" units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file
diff --git a/src/panfrost/perf/T88x.xml b/src/panfrost/perf/T88x.xml
new file mode 100644 (file)
index 0000000..ebbe040
--- /dev/null
@@ -0,0 +1,110 @@
+<!--
+Copyright © 2017-2020 ARM Limited.
+Copyright © 2021 Collabora, Ltd.
+Author: Antonio Caggiano <antonio.caggiano@collabora.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+<metrics id="T88x">
+    <category name="Job Manager" per_cpu="no">
+        <event offset="6" counter="GPU_ACTIVE" title="Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active" units="cycles" />
+        <event offset="7" counter="IRQ_ACTIVE" title="Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending" units="cycles" />
+        <event offset="10" counter="JS0_ACTIVE" title="Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active" units="cycles" />
+        <event offset="18" counter="JS1_ACTIVE" title="Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active" units="cycles" />
+        <event offset="26" counter="JS2_ACTIVE" title="Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active" units="cycles" />
+        <event offset="8" counter="JS0_JOBS" title="Job Manager Jobs" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0" units="jobs" />
+        <event offset="16" counter="JS1_JOBS" title="Job Manager Jobs" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1" units="jobs" />
+        <event offset="24" counter="JS2_JOBS" title="Job Manager Jobs" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2" units="jobs" />
+        <event offset="9" counter="JS0_TASKS" title="Job Manager" name="Pixels" description="Number of Pixels completed in JS0" multiplier="1024" units="pixels" />
+        <event offset="17" counter="JS1_TASKS" title="Job Manager Tasks" name="JS1 tasks" description="Number of Tasks completed in JS1" units="tasks" />
+        <event offset="25" counter="JS2_TASKS" title="Job Manager Tasks" name="JS2 tasks" description="Number of Tasks completed in JS2" units="tasks" />
+    </category>
+    <category name="Tiler" per_cpu="no">
+        <event offset="109" counter="TI_ACTIVE" title="Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active" units="cycles" />
+        <event offset="70" counter="TI_POLYGONS" title="Tiler Primitives" name="Polygons" description="Number of polygons processed" units="primitives" />
+        <event offset="69" counter="TI_QUADS" title="Tiler Primitives" name="Quads" description="Number of quads processed" units="primitives" />
+        <event offset="68" counter="TI_TRIANGLES" title="Tiler Primitives" name="Triangles" description="Number of triangles processed" units="primitives" />
+        <event offset="72" counter="TI_LINES" title="Tiler Primitives" name="Lines" description="Number of lines processed" units="primitives" />
+        <event offset="71" counter="TI_POINTS" title="Tiler Primitives" name="Points" description="Number of points processed" units="primitives" />
+        <event offset="75" counter="TI_FRONT_FACING" title="Tiler Culling" name="Front facing prims" description="Number of front facing primitives" units="primitives" />
+        <event offset="76" counter="TI_BACK_FACING" title="Tiler Culling" name="Back facing prims" description="Number of back facing primitives" units="primitives" />
+        <event offset="77" counter="TI_PRIM_VISIBLE" title="Tiler Culling" name="Visible prims" description="Number of visible primitives" units="primitives" />
+        <event offset="78" counter="TI_PRIM_CULLED" title="Tiler Culling" name="Culled prims" description="Number of culled primitives" units="primitives" />
+        <event offset="79" counter="TI_PRIM_CLIPPED" title="Tiler Culling" name="Clipped prims" description="Number of clipped primitives" units="primitives" />
+        <event offset="80" counter="TI_LEVEL0" title="Tiler Hierarchy" name="L0 prims" description="Number of primitives in hierarchy level 0" units="primitives" />
+        <event offset="81" counter="TI_LEVEL1" title="Tiler Hierarchy" name="L1 prims" description="Number of primitives in hierarchy level 1" units="primitives" />
+        <event offset="82" counter="TI_LEVEL2" title="Tiler Hierarchy" name="L2 prims" description="Number of primitives in hierarchy level 2" units="primitives" />
+        <event offset="83" counter="TI_LEVEL3" title="Tiler Hierarchy" name="L3 prims" description="Number of primitives in hierarchy level 3" units="primitives" />
+        <event offset="84" counter="TI_LEVEL4" title="Tiler Hierarchy" name="L4 prims" description="Number of primitives in hierarchy level 4" units="primitives" />
+        <event offset="85" counter="TI_LEVEL5" title="Tiler Hierarchy" name="L5 prims" description="Number of primitives in hierarchy level 5" units="primitives" />
+        <event offset="86" counter="TI_LEVEL6" title="Tiler Hierarchy" name="L6 prims" description="Number of primitives in hierarchy level 6" units="primitives" />
+        <event offset="87" counter="TI_LEVEL7" title="Tiler Hierarchy" name="L7 prims" description="Number of primitives in hierarchy level 7" units="primitives" />
+    </category>
+    <category name="Shader Core" per_cpu="no">
+        <event offset="218" counter="TRIPIPE_ACTIVE" title="Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active" units="cycles" />
+        <event offset="196" counter="FRAG_ACTIVE" title="Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active" units="cycles" />
+        <event offset="214" counter="COMPUTE_ACTIVE" title="Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active" units="cycles" />
+        <event offset="211" counter="FRAG_CYCLES_NO_TILE" title="Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer" units="cycles" />
+        <event offset="200" counter="FRAG_CYCLES_FPKQ_ACTIVE" title="Core Cycles" name="Fragment cycles pre-pipe buffer not empty" description="Number of cycles the pre-pipe queue contains quads" units="cycles" />
+        <event offset="204" counter="FRAG_THREADS" title="Fragment Threads" name="Fragment threads" description="Number of fragment threads started" units="threads" />
+        <event offset="205" counter="FRAG_DUMMY_THREADS" title="Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started" units="threads" />
+        <event offset="209" counter="FRAG_THREADS_LZS_TEST" title="Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test" units="threads" />
+        <event offset="210" counter="FRAG_THREADS_LZS_KILLED" title="Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test" units="threads" />
+        <event offset="215" counter="COMPUTE_TASKS" title="Compute Tasks" name="Compute tasks" description="Number of compute tasks" units="tasks" />
+        <event offset="216" counter="COMPUTE_THREADS" title="Compute Threads" name="Compute threads" description="Number of compute threads started" units="threads" />
+        <event offset="197" counter="FRAG_PRIMITIVES" title="Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler" units="primitives" />
+        <event offset="198" counter="FRAG_PRIMITIVES_DROPPED" title="Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile" units="primitives" />
+        <event offset="206" counter="FRAG_QUADS_RAST" title="Fragment Quads" name="Quads rasterized" description="Number of quads rasterized" units="quads" />
+        <event offset="207" counter="FRAG_QUADS_EZS_TEST" title="Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test" units="quads" />
+        <event offset="208" counter="FRAG_QUADS_EZS_KILLED" title="Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test" units="quads" />
+        <event offset="212" counter="FRAG_NUM_TILES" title="Fragment" name="Pixels" description="Number of pixels rendered" multiplier="1024" units="pixels" />
+        <event offset="213" counter="FRAG_TRANS_ELIM" title="Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination" units="tiles" />
+        <event offset="219" counter="ARITH_WORDS" title="Arithmetic Pipe" name="A instructions" description="Number of instructions completed by the the A-pipe (normalized per pipeline)" units="instructions" />
+        <event offset="223" counter="LS_WORDS" title="Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe" units="instructions" />
+        <event offset="224" counter="LS_ISSUES" title="Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts" units="instructions" />
+        <event offset="230" counter="TEX_WORDS" title="Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe" units="instructions" />
+        <event offset="234" counter="TEX_ISSUES" title="Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation" units="instructions" />
+        <event offset="235" counter="TEX_RECIRC_FMISS" title="Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss" units="requests" />
+        <event offset="241" counter="LSC_READ_OP" title="Load/Store Cache Reads" name="Read operations" description="Number of read operations in the Load/Store cache" units="requests" />
+        <event offset="240" counter="LSC_READ_HITS" title="Load/Store Cache Reads" name="Read hits" description="Number of read hits in the Load/Store cache" units="requests" />
+        <event offset="243" counter="LSC_WRITE_OP" title="Load/Store Cache Writes" name="Write operations" description="Number of write operations in the Load/Store cache" units="requests" />
+        <event offset="242" counter="LSC_WRITE_HITS" title="Load/Store Cache Writes" name="Write hits" description="Number of write hits in the Load/Store cache" units="requests" />
+        <event offset="245" counter="LSC_ATOMIC_OP" title="Load/Store Cache Atomics" name="Atomic operations" description="Number of atomic operations in the Load/Store cache" units="requests" />
+        <event offset="244" counter="LSC_ATOMIC_HITS" title="Load/Store Cache Atomics" name="Atomic hits" description="Number of atomic hits in the Load/Store cache" units="requests" />
+        <event offset="246" counter="LSC_LINE_FETCHES" title="Load/Store Cache Bus" name="Line fetches" description="Number of line fetches in the Load/Store cache" units="requests" />
+        <event offset="247" counter="LSC_DIRTY_LINE" title="Load/Store Cache Bus" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache" units="requests" />
+        <event offset="248" counter="LSC_SNOOPS" title="Load/Store Cache Bus" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache" units="requests" />
+    </category>
+    <category name="L2 Cache" per_cpu="no">
+        <event offset="161" counter="L2_READ_LOOKUP" title="L2 Cache Reads" name="L2 read lookups" description="Number of reads into the L2 cache" units="requests" />
+        <event offset="164" counter="L2_READ_SNOOP" title="L2 Cache Reads" name="Read snoops" description="Number of read transaction snoops" units="requests" />
+        <event offset="165" counter="L2_READ_HIT" title="L2 Cache Reads" name="L2 read hits" description="Number of reads hitting in the L2 cache" units="requests" />
+        <event offset="170" counter="L2_WRITE_SNOOP" title="L2 Cache Writes" name="Write snoops" description="Number of write transaction snoops" units="requests" />
+        <event offset="171" counter="L2_WRITE_HIT" title="L2 Cache Writes" name="L2 write hits" description="Number of writes hitting in the L2 cache" units="requests" />
+        <event offset="167" counter="L2_WRITE_LOOKUP" title="L2 Cache Writes" name="L2 write lookups" description="Number of writes into the L2 cache" units="requests" />
+        <event offset="159" counter="L2_EXT_READ_BEATS" title="L2 Cache Ext Reads" name="External read bytes" description="Number of external bus read bytes" multiplier="16" units="bytes" />
+        <event offset="182" counter="L2_EXT_AR_STALL" title="L2 Cache Ext Reads" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect" units="cycles" />
+        <event offset="188" counter="L2_EXT_R_BUF_FULL" title="L2 Cache Ext Reads" name="External bus response buffer full" description="Number of cycles a valid request is blocked by a full response buffer" units="cycles" />
+        <event offset="184" counter="L2_EXT_RD_BUF_FULL" title="L2 Cache Ext Reads" name="External bus read data buffer full" description="Number of cycles a valid request is blocked by a full read data buffer" units="cycles" />
+        <event offset="158" counter="L2_EXT_WRITE_BEATS" title="L2 Cache Ext Writes" name="External write bytes" description="Number of external bus write bytes" multiplier="16" units="bytes" />
+        <event offset="186" counter="L2_EXT_W_STALL" title="L2 Cache Ext Writes" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect" units="cycles" />
+        <event offset="187" counter="L2_EXT_W_BUF_FULL" title="L2 Cache Ext Writes" name="External bus write buffer full" description="Number of cycles a valid request is blocked by a full write buffer" units="cycles" />
+    </category>
+</metrics>
\ No newline at end of file