freedreno/perfcntrs: Re-indent
authorRob Clark <robdclark@chromium.org>
Fri, 16 Apr 2021 18:59:30 +0000 (11:59 -0700)
committerMarge Bot <eric+marge@anholt.net>
Sat, 17 Apr 2021 15:38:56 +0000 (15:38 +0000)
Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10293>

src/freedreno/perfcntrs/fd2_perfcntr.c
src/freedreno/perfcntrs/fd5_perfcntr.c
src/freedreno/perfcntrs/fd6_perfcntr.c
src/freedreno/perfcntrs/fdperf.c
src/freedreno/perfcntrs/freedreno_dt.c
src/freedreno/perfcntrs/freedreno_dt.h
src/freedreno/perfcntrs/freedreno_perfcntr.c
src/freedreno/perfcntrs/freedreno_perfcntr.h

index af9a514..f0affe6 100644 (file)
 #include "freedreno_perfcntr.h"
 
 static const struct fd_perfcntr_countable pa_su_countables[] = {
-       COUNTABLE(PERF_PAPC_PASX_REQ, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PASX_FIRST_VECTOR, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PASX_SECOND_VECTOR, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PASX_FIRST_DEAD, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PASX_SECOND_DEAD, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PASX_VTX_KILL_DISCARD, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PASX_VTX_NAN_DISCARD, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PA_INPUT_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PA_INPUT_NULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PA_INPUT_EVENT_FLAG, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PA_INPUT_FIRST_PRIM_SLOT, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PA_INPUT_END_OF_PACKET, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_CULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_VV_CULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_VTX_KILL_CULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_VTX_NAN_CULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_CULL_TO_NULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_VV_CLIP_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_POINT_CLIP_CANDIDATE, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_1, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_2, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_3, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_4, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_5, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_6, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_NEAR, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_FAR, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_LEFT, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_RIGHT, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_TOP, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_BOTTOM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLSM_NULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLSM_TOTALLY_VISIBLE_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLSM_CLIP_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLSM_CULL_TO_NULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_1, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_2, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_3, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_4, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_5, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_6_7, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLSM_NON_TRIVIAL_CULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_INPUT_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_INPUT_CLIP_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_INPUT_NULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_ZERO_AREA_CULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_BACK_FACE_CULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_FRONT_FACE_CULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_POLYMODE_FACE_CULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_POLYMODE_BACK_CULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_POLYMODE_FRONT_CULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_POLYMODE_INVALID_FILL, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_OUTPUT_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_OUTPUT_CLIP_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_OUTPUT_NULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_OUTPUT_EVENT_FLAG, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_OUTPUT_FIRST_PRIM_SLOT, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_OUTPUT_END_OF_PACKET, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_FACE, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_BACK, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_FRONT, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_FACE, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_BACK, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_FRONT, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PASX_REQ_IDLE, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PASX_REQ_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PASX_REQ_STALLED, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PASX_REC_IDLE, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PASX_REC_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PASX_REC_STARVED_SX, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PASX_REC_STALLED, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PASX_REC_STALLED_POS_MEM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_PASX_REC_STALLED_CCGSM_IN, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CCGSM_IDLE, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CCGSM_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CCGSM_STALLED, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPRIM_IDLE, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPRIM_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPRIM_STALLED, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLPRIM_STARVED_CCGSM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLIPSM_IDLE, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLIPSM_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIP_VERT_ENGH, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLIPSM_WAIT_HIGH_PRI_SEQ, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIPGA, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLIPSM_WAIT_AVAIL_VTE_CLIP, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIP_OUTSM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLIPGA_IDLE, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLIPGA_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLIPGA_STARVED_VTE_CLIP, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLIPGA_STALLED, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLIP_IDLE, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_CLIP_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_IDLE, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_STARVED_CLIP, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_STALLED_SC, UINT64, AVERAGE),
-       COUNTABLE(PERF_PAPC_SU_FACENESS_CULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_REQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_FIRST_VECTOR, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_SECOND_VECTOR, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_FIRST_DEAD, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_SECOND_DEAD, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_VTX_KILL_DISCARD, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_VTX_NAN_DISCARD, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PA_INPUT_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PA_INPUT_NULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PA_INPUT_EVENT_FLAG, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PA_INPUT_FIRST_PRIM_SLOT, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PA_INPUT_END_OF_PACKET, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_CULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_VV_CULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_VTX_KILL_CULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_VTX_NAN_CULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_CULL_TO_NULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_VV_CLIP_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_POINT_CLIP_CANDIDATE, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_1, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_2, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_3, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_4, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_5, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_6, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_NEAR, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_FAR, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_LEFT, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_RIGHT, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_TOP, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_BOTTOM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLSM_NULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLSM_TOTALLY_VISIBLE_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLSM_CLIP_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLSM_CULL_TO_NULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_1, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_2, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_3, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_4, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_5, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_6_7, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLSM_NON_TRIVIAL_CULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_INPUT_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_INPUT_CLIP_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_INPUT_NULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_ZERO_AREA_CULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_BACK_FACE_CULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_FRONT_FACE_CULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_POLYMODE_FACE_CULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_POLYMODE_BACK_CULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_POLYMODE_FRONT_CULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_POLYMODE_INVALID_FILL, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_OUTPUT_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_OUTPUT_CLIP_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_OUTPUT_NULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_OUTPUT_EVENT_FLAG, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_OUTPUT_FIRST_PRIM_SLOT, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_OUTPUT_END_OF_PACKET, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_FACE, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_BACK, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_FRONT, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_FACE, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_BACK, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_FRONT, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_REQ_IDLE, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_REQ_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_REQ_STALLED, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_REC_IDLE, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_REC_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_REC_STARVED_SX, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_REC_STALLED, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_REC_STALLED_POS_MEM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_PASX_REC_STALLED_CCGSM_IN, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CCGSM_IDLE, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CCGSM_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CCGSM_STALLED, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPRIM_IDLE, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPRIM_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPRIM_STALLED, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLPRIM_STARVED_CCGSM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLIPSM_IDLE, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLIPSM_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIP_VERT_ENGH, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLIPSM_WAIT_HIGH_PRI_SEQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIPGA, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLIPSM_WAIT_AVAIL_VTE_CLIP, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIP_OUTSM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLIPGA_IDLE, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLIPGA_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLIPGA_STARVED_VTE_CLIP, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLIPGA_STALLED, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLIP_IDLE, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_CLIP_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_IDLE, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_STARVED_CLIP, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_STALLED_SC, UINT64, AVERAGE),
+      COUNTABLE(PERF_PAPC_SU_FACENESS_CULL, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_countable pa_sc_countables[] = {
-       COUNTABLE(SC_SR_WINDOW_VALID, UINT64, AVERAGE),
-       COUNTABLE(SC_CW_WINDOW_VALID, UINT64, AVERAGE),
-       COUNTABLE(SC_QM_WINDOW_VALID, UINT64, AVERAGE),
-       COUNTABLE(SC_FW_WINDOW_VALID, UINT64, AVERAGE),
-       COUNTABLE(SC_EZ_WINDOW_VALID, UINT64, AVERAGE),
-       COUNTABLE(SC_IT_WINDOW_VALID, UINT64, AVERAGE),
-       COUNTABLE(SC_STARVED_BY_PA, UINT64, AVERAGE),
-       COUNTABLE(SC_STALLED_BY_RB_TILE, UINT64, AVERAGE),
-       COUNTABLE(SC_STALLED_BY_RB_SAMP, UINT64, AVERAGE),
-       COUNTABLE(SC_STARVED_BY_RB_EZ, UINT64, AVERAGE),
-       COUNTABLE(SC_STALLED_BY_SAMPLE_FF, UINT64, AVERAGE),
-       COUNTABLE(SC_STALLED_BY_SQ, UINT64, AVERAGE),
-       COUNTABLE(SC_STALLED_BY_SP, UINT64, AVERAGE),
-       COUNTABLE(SC_TOTAL_NO_PRIMS, UINT64, AVERAGE),
-       COUNTABLE(SC_NON_EMPTY_PRIMS, UINT64, AVERAGE),
-       COUNTABLE(SC_NO_TILES_PASSING_QM, UINT64, AVERAGE),
-       COUNTABLE(SC_NO_PIXELS_PRE_EZ, UINT64, AVERAGE),
-       COUNTABLE(SC_NO_PIXELS_POST_EZ, UINT64, AVERAGE),
+      COUNTABLE(SC_SR_WINDOW_VALID, UINT64, AVERAGE),
+      COUNTABLE(SC_CW_WINDOW_VALID, UINT64, AVERAGE),
+      COUNTABLE(SC_QM_WINDOW_VALID, UINT64, AVERAGE),
+      COUNTABLE(SC_FW_WINDOW_VALID, UINT64, AVERAGE),
+      COUNTABLE(SC_EZ_WINDOW_VALID, UINT64, AVERAGE),
+      COUNTABLE(SC_IT_WINDOW_VALID, UINT64, AVERAGE),
+      COUNTABLE(SC_STARVED_BY_PA, UINT64, AVERAGE),
+      COUNTABLE(SC_STALLED_BY_RB_TILE, UINT64, AVERAGE),
+      COUNTABLE(SC_STALLED_BY_RB_SAMP, UINT64, AVERAGE),
+      COUNTABLE(SC_STARVED_BY_RB_EZ, UINT64, AVERAGE),
+      COUNTABLE(SC_STALLED_BY_SAMPLE_FF, UINT64, AVERAGE),
+      COUNTABLE(SC_STALLED_BY_SQ, UINT64, AVERAGE),
+      COUNTABLE(SC_STALLED_BY_SP, UINT64, AVERAGE),
+      COUNTABLE(SC_TOTAL_NO_PRIMS, UINT64, AVERAGE),
+      COUNTABLE(SC_NON_EMPTY_PRIMS, UINT64, AVERAGE),
+      COUNTABLE(SC_NO_TILES_PASSING_QM, UINT64, AVERAGE),
+      COUNTABLE(SC_NO_PIXELS_PRE_EZ, UINT64, AVERAGE),
+      COUNTABLE(SC_NO_PIXELS_POST_EZ, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_countable vgt_countables[] = {
-       COUNTABLE(VGT_SQ_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE),
-       COUNTABLE(VGT_SQ_SEND, UINT64, AVERAGE),
-       COUNTABLE(VGT_SQ_STALLED, UINT64, AVERAGE),
-       COUNTABLE(VGT_SQ_STARVED_BUSY, UINT64, AVERAGE),
-       COUNTABLE(VGT_SQ_STARVED_IDLE, UINT64, AVERAGE),
-       COUNTABLE(VGT_SQ_STATIC, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_CLIP_V_SEND, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_CLIP_V_STALLED, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_CLIP_V_STARVED_BUSY, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_CLIP_V_STARVED_IDLE, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_CLIP_V_STATIC, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_CLIP_P_SEND, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_CLIP_P_STALLED, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_CLIP_P_STARVED_BUSY, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_CLIP_P_STARVED_IDLE, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_CLIP_P_STATIC, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_CLIP_S_SEND, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_CLIP_S_STALLED, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_CLIP_S_STARVED_BUSY, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_CLIP_S_STARVED_IDLE, UINT64, AVERAGE),
-       COUNTABLE(VGT_PA_CLIP_S_STATIC, UINT64, AVERAGE),
-       COUNTABLE(RBIU_FIFOS_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE),
-       COUNTABLE(RBIU_IMMED_DATA_FIFO_STARVED, UINT64, AVERAGE),
-       COUNTABLE(RBIU_IMMED_DATA_FIFO_STALLED, UINT64, AVERAGE),
-       COUNTABLE(RBIU_DMA_REQUEST_FIFO_STARVED, UINT64, AVERAGE),
-       COUNTABLE(RBIU_DMA_REQUEST_FIFO_STALLED, UINT64, AVERAGE),
-       COUNTABLE(RBIU_DRAW_INITIATOR_FIFO_STARVED, UINT64, AVERAGE),
-       COUNTABLE(RBIU_DRAW_INITIATOR_FIFO_STALLED, UINT64, AVERAGE),
-       COUNTABLE(BIN_PRIM_NEAR_CULL, UINT64, AVERAGE),
-       COUNTABLE(BIN_PRIM_ZERO_CULL, UINT64, AVERAGE),
-       COUNTABLE(BIN_PRIM_FAR_CULL, UINT64, AVERAGE),
-       COUNTABLE(BIN_PRIM_BIN_CULL, UINT64, AVERAGE),
-       COUNTABLE(BIN_PRIM_FACE_CULL, UINT64, AVERAGE),
-       COUNTABLE(SPARE34, UINT64, AVERAGE),
-       COUNTABLE(SPARE35, UINT64, AVERAGE),
-       COUNTABLE(SPARE36, UINT64, AVERAGE),
-       COUNTABLE(SPARE37, UINT64, AVERAGE),
-       COUNTABLE(SPARE38, UINT64, AVERAGE),
-       COUNTABLE(SPARE39, UINT64, AVERAGE),
-       COUNTABLE(TE_SU_IN_VALID, UINT64, AVERAGE),
-       COUNTABLE(TE_SU_IN_READ, UINT64, AVERAGE),
-       COUNTABLE(TE_SU_IN_PRIM, UINT64, AVERAGE),
-       COUNTABLE(TE_SU_IN_EOP, UINT64, AVERAGE),
-       COUNTABLE(TE_SU_IN_NULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(TE_WK_IN_VALID, UINT64, AVERAGE),
-       COUNTABLE(TE_WK_IN_READ, UINT64, AVERAGE),
-       COUNTABLE(TE_OUT_PRIM_VALID, UINT64, AVERAGE),
-       COUNTABLE(TE_OUT_PRIM_READ, UINT64, AVERAGE),
+      COUNTABLE(VGT_SQ_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE),
+      COUNTABLE(VGT_SQ_SEND, UINT64, AVERAGE),
+      COUNTABLE(VGT_SQ_STALLED, UINT64, AVERAGE),
+      COUNTABLE(VGT_SQ_STARVED_BUSY, UINT64, AVERAGE),
+      COUNTABLE(VGT_SQ_STARVED_IDLE, UINT64, AVERAGE),
+      COUNTABLE(VGT_SQ_STATIC, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_CLIP_V_SEND, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_CLIP_V_STALLED, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_CLIP_V_STARVED_BUSY, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_CLIP_V_STARVED_IDLE, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_CLIP_V_STATIC, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_CLIP_P_SEND, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_CLIP_P_STALLED, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_CLIP_P_STARVED_BUSY, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_CLIP_P_STARVED_IDLE, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_CLIP_P_STATIC, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_CLIP_S_SEND, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_CLIP_S_STALLED, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_CLIP_S_STARVED_BUSY, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_CLIP_S_STARVED_IDLE, UINT64, AVERAGE),
+      COUNTABLE(VGT_PA_CLIP_S_STATIC, UINT64, AVERAGE),
+      COUNTABLE(RBIU_FIFOS_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE),
+      COUNTABLE(RBIU_IMMED_DATA_FIFO_STARVED, UINT64, AVERAGE),
+      COUNTABLE(RBIU_IMMED_DATA_FIFO_STALLED, UINT64, AVERAGE),
+      COUNTABLE(RBIU_DMA_REQUEST_FIFO_STARVED, UINT64, AVERAGE),
+      COUNTABLE(RBIU_DMA_REQUEST_FIFO_STALLED, UINT64, AVERAGE),
+      COUNTABLE(RBIU_DRAW_INITIATOR_FIFO_STARVED, UINT64, AVERAGE),
+      COUNTABLE(RBIU_DRAW_INITIATOR_FIFO_STALLED, UINT64, AVERAGE),
+      COUNTABLE(BIN_PRIM_NEAR_CULL, UINT64, AVERAGE),
+      COUNTABLE(BIN_PRIM_ZERO_CULL, UINT64, AVERAGE),
+      COUNTABLE(BIN_PRIM_FAR_CULL, UINT64, AVERAGE),
+      COUNTABLE(BIN_PRIM_BIN_CULL, UINT64, AVERAGE),
+      COUNTABLE(BIN_PRIM_FACE_CULL, UINT64, AVERAGE),
+      COUNTABLE(SPARE34, UINT64, AVERAGE),
+      COUNTABLE(SPARE35, UINT64, AVERAGE),
+      COUNTABLE(SPARE36, UINT64, AVERAGE),
+      COUNTABLE(SPARE37, UINT64, AVERAGE),
+      COUNTABLE(SPARE38, UINT64, AVERAGE),
+      COUNTABLE(SPARE39, UINT64, AVERAGE),
+      COUNTABLE(TE_SU_IN_VALID, UINT64, AVERAGE),
+      COUNTABLE(TE_SU_IN_READ, UINT64, AVERAGE),
+      COUNTABLE(TE_SU_IN_PRIM, UINT64, AVERAGE),
+      COUNTABLE(TE_SU_IN_EOP, UINT64, AVERAGE),
+      COUNTABLE(TE_SU_IN_NULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(TE_WK_IN_VALID, UINT64, AVERAGE),
+      COUNTABLE(TE_WK_IN_READ, UINT64, AVERAGE),
+      COUNTABLE(TE_OUT_PRIM_VALID, UINT64, AVERAGE),
+      COUNTABLE(TE_OUT_PRIM_READ, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_countable tcr_countables[] = {
-       COUNTABLE(DGMMPD_IPMUX0_STALL, UINT64, AVERAGE),
-       COUNTABLE(DGMMPD_IPMUX_ALL_STALL, UINT64, AVERAGE),
-       COUNTABLE(OPMUX0_L2_WRITES, UINT64, AVERAGE),
+      COUNTABLE(DGMMPD_IPMUX0_STALL, UINT64, AVERAGE),
+      COUNTABLE(DGMMPD_IPMUX_ALL_STALL, UINT64, AVERAGE),
+      COUNTABLE(OPMUX0_L2_WRITES, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_countable tp0_countables[] = {
-       COUNTABLE(POINT_QUADS, UINT64, AVERAGE),
-       COUNTABLE(BILIN_QUADS, UINT64, AVERAGE),
-       COUNTABLE(ANISO_QUADS, UINT64, AVERAGE),
-       COUNTABLE(MIP_QUADS, UINT64, AVERAGE),
-       COUNTABLE(VOL_QUADS, UINT64, AVERAGE),
-       COUNTABLE(MIP_VOL_QUADS, UINT64, AVERAGE),
-       COUNTABLE(MIP_ANISO_QUADS, UINT64, AVERAGE),
-       COUNTABLE(VOL_ANISO_QUADS, UINT64, AVERAGE),
-       COUNTABLE(ANISO_2_1_QUADS, UINT64, AVERAGE),
-       COUNTABLE(ANISO_4_1_QUADS, UINT64, AVERAGE),
-       COUNTABLE(ANISO_6_1_QUADS, UINT64, AVERAGE),
-       COUNTABLE(ANISO_8_1_QUADS, UINT64, AVERAGE),
-       COUNTABLE(ANISO_10_1_QUADS, UINT64, AVERAGE),
-       COUNTABLE(ANISO_12_1_QUADS, UINT64, AVERAGE),
-       COUNTABLE(ANISO_14_1_QUADS, UINT64, AVERAGE),
-       COUNTABLE(ANISO_16_1_QUADS, UINT64, AVERAGE),
-       COUNTABLE(MIP_VOL_ANISO_QUADS, UINT64, AVERAGE),
-       COUNTABLE(ALIGN_2_QUADS, UINT64, AVERAGE),
-       COUNTABLE(ALIGN_4_QUADS, UINT64, AVERAGE),
-       COUNTABLE(PIX_0_QUAD, UINT64, AVERAGE),
-       COUNTABLE(PIX_1_QUAD, UINT64, AVERAGE),
-       COUNTABLE(PIX_2_QUAD, UINT64, AVERAGE),
-       COUNTABLE(PIX_3_QUAD, UINT64, AVERAGE),
-       COUNTABLE(PIX_4_QUAD, UINT64, AVERAGE),
-       COUNTABLE(TP_MIPMAP_LOD0, UINT64, AVERAGE),
-       COUNTABLE(TP_MIPMAP_LOD1, UINT64, AVERAGE),
-       COUNTABLE(TP_MIPMAP_LOD2, UINT64, AVERAGE),
-       COUNTABLE(TP_MIPMAP_LOD3, UINT64, AVERAGE),
-       COUNTABLE(TP_MIPMAP_LOD4, UINT64, AVERAGE),
-       COUNTABLE(TP_MIPMAP_LOD5, UINT64, AVERAGE),
-       COUNTABLE(TP_MIPMAP_LOD6, UINT64, AVERAGE),
-       COUNTABLE(TP_MIPMAP_LOD7, UINT64, AVERAGE),
-       COUNTABLE(TP_MIPMAP_LOD8, UINT64, AVERAGE),
-       COUNTABLE(TP_MIPMAP_LOD9, UINT64, AVERAGE),
-       COUNTABLE(TP_MIPMAP_LOD10, UINT64, AVERAGE),
-       COUNTABLE(TP_MIPMAP_LOD11, UINT64, AVERAGE),
-       COUNTABLE(TP_MIPMAP_LOD12, UINT64, AVERAGE),
-       COUNTABLE(TP_MIPMAP_LOD13, UINT64, AVERAGE),
-       COUNTABLE(TP_MIPMAP_LOD14, UINT64, AVERAGE),
+      COUNTABLE(POINT_QUADS, UINT64, AVERAGE),
+      COUNTABLE(BILIN_QUADS, UINT64, AVERAGE),
+      COUNTABLE(ANISO_QUADS, UINT64, AVERAGE),
+      COUNTABLE(MIP_QUADS, UINT64, AVERAGE),
+      COUNTABLE(VOL_QUADS, UINT64, AVERAGE),
+      COUNTABLE(MIP_VOL_QUADS, UINT64, AVERAGE),
+      COUNTABLE(MIP_ANISO_QUADS, UINT64, AVERAGE),
+      COUNTABLE(VOL_ANISO_QUADS, UINT64, AVERAGE),
+      COUNTABLE(ANISO_2_1_QUADS, UINT64, AVERAGE),
+      COUNTABLE(ANISO_4_1_QUADS, UINT64, AVERAGE),
+      COUNTABLE(ANISO_6_1_QUADS, UINT64, AVERAGE),
+      COUNTABLE(ANISO_8_1_QUADS, UINT64, AVERAGE),
+      COUNTABLE(ANISO_10_1_QUADS, UINT64, AVERAGE),
+      COUNTABLE(ANISO_12_1_QUADS, UINT64, AVERAGE),
+      COUNTABLE(ANISO_14_1_QUADS, UINT64, AVERAGE),
+      COUNTABLE(ANISO_16_1_QUADS, UINT64, AVERAGE),
+      COUNTABLE(MIP_VOL_ANISO_QUADS, UINT64, AVERAGE),
+      COUNTABLE(ALIGN_2_QUADS, UINT64, AVERAGE),
+      COUNTABLE(ALIGN_4_QUADS, UINT64, AVERAGE),
+      COUNTABLE(PIX_0_QUAD, UINT64, AVERAGE),
+      COUNTABLE(PIX_1_QUAD, UINT64, AVERAGE),
+      COUNTABLE(PIX_2_QUAD, UINT64, AVERAGE),
+      COUNTABLE(PIX_3_QUAD, UINT64, AVERAGE),
+      COUNTABLE(PIX_4_QUAD, UINT64, AVERAGE),
+      COUNTABLE(TP_MIPMAP_LOD0, UINT64, AVERAGE),
+      COUNTABLE(TP_MIPMAP_LOD1, UINT64, AVERAGE),
+      COUNTABLE(TP_MIPMAP_LOD2, UINT64, AVERAGE),
+      COUNTABLE(TP_MIPMAP_LOD3, UINT64, AVERAGE),
+      COUNTABLE(TP_MIPMAP_LOD4, UINT64, AVERAGE),
+      COUNTABLE(TP_MIPMAP_LOD5, UINT64, AVERAGE),
+      COUNTABLE(TP_MIPMAP_LOD6, UINT64, AVERAGE),
+      COUNTABLE(TP_MIPMAP_LOD7, UINT64, AVERAGE),
+      COUNTABLE(TP_MIPMAP_LOD8, UINT64, AVERAGE),
+      COUNTABLE(TP_MIPMAP_LOD9, UINT64, AVERAGE),
+      COUNTABLE(TP_MIPMAP_LOD10, UINT64, AVERAGE),
+      COUNTABLE(TP_MIPMAP_LOD11, UINT64, AVERAGE),
+      COUNTABLE(TP_MIPMAP_LOD12, UINT64, AVERAGE),
+      COUNTABLE(TP_MIPMAP_LOD13, UINT64, AVERAGE),
+      COUNTABLE(TP_MIPMAP_LOD14, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_countable tcm_countables[] = {
-       COUNTABLE(QUAD0_RD_LAT_FIFO_EMPTY, UINT64, AVERAGE),
-       COUNTABLE(QUAD0_RD_LAT_FIFO_4TH_FULL, UINT64, AVERAGE),
-       COUNTABLE(QUAD0_RD_LAT_FIFO_HALF_FULL, UINT64, AVERAGE),
-       COUNTABLE(QUAD0_RD_LAT_FIFO_FULL, UINT64, AVERAGE),
-       COUNTABLE(QUAD0_RD_LAT_FIFO_LT_4TH_FULL, UINT64, AVERAGE),
-       COUNTABLE(READ_STARVED_QUAD0, UINT64, AVERAGE),
-       COUNTABLE(READ_STARVED, UINT64, AVERAGE),
-       COUNTABLE(READ_STALLED_QUAD0, UINT64, AVERAGE),
-       COUNTABLE(READ_STALLED, UINT64, AVERAGE),
-       COUNTABLE(VALID_READ_QUAD0, UINT64, AVERAGE),
-       COUNTABLE(TC_TP_STARVED_QUAD0, UINT64, AVERAGE),
-       COUNTABLE(TC_TP_STARVED, UINT64, AVERAGE),
+      COUNTABLE(QUAD0_RD_LAT_FIFO_EMPTY, UINT64, AVERAGE),
+      COUNTABLE(QUAD0_RD_LAT_FIFO_4TH_FULL, UINT64, AVERAGE),
+      COUNTABLE(QUAD0_RD_LAT_FIFO_HALF_FULL, UINT64, AVERAGE),
+      COUNTABLE(QUAD0_RD_LAT_FIFO_FULL, UINT64, AVERAGE),
+      COUNTABLE(QUAD0_RD_LAT_FIFO_LT_4TH_FULL, UINT64, AVERAGE),
+      COUNTABLE(READ_STARVED_QUAD0, UINT64, AVERAGE),
+      COUNTABLE(READ_STARVED, UINT64, AVERAGE),
+      COUNTABLE(READ_STALLED_QUAD0, UINT64, AVERAGE),
+      COUNTABLE(READ_STALLED, UINT64, AVERAGE),
+      COUNTABLE(VALID_READ_QUAD0, UINT64, AVERAGE),
+      COUNTABLE(TC_TP_STARVED_QUAD0, UINT64, AVERAGE),
+      COUNTABLE(TC_TP_STARVED, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_countable tcf_countables[] = {
-       COUNTABLE(VALID_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(SINGLE_PHASES, UINT64, AVERAGE),
-       COUNTABLE(ANISO_PHASES, UINT64, AVERAGE),
-       COUNTABLE(MIP_PHASES, UINT64, AVERAGE),
-       COUNTABLE(VOL_PHASES, UINT64, AVERAGE),
-       COUNTABLE(MIP_VOL_PHASES, UINT64, AVERAGE),
-       COUNTABLE(MIP_ANISO_PHASES, UINT64, AVERAGE),
-       COUNTABLE(VOL_ANISO_PHASES, UINT64, AVERAGE),
-       COUNTABLE(ANISO_2_1_PHASES, UINT64, AVERAGE),
-       COUNTABLE(ANISO_4_1_PHASES, UINT64, AVERAGE),
-       COUNTABLE(ANISO_6_1_PHASES, UINT64, AVERAGE),
-       COUNTABLE(ANISO_8_1_PHASES, UINT64, AVERAGE),
-       COUNTABLE(ANISO_10_1_PHASES, UINT64, AVERAGE),
-       COUNTABLE(ANISO_12_1_PHASES, UINT64, AVERAGE),
-       COUNTABLE(ANISO_14_1_PHASES, UINT64, AVERAGE),
-       COUNTABLE(ANISO_16_1_PHASES, UINT64, AVERAGE),
-       COUNTABLE(MIP_VOL_ANISO_PHASES, UINT64, AVERAGE),
-       COUNTABLE(ALIGN_2_PHASES, UINT64, AVERAGE),
-       COUNTABLE(ALIGN_4_PHASES, UINT64, AVERAGE),
-       COUNTABLE(TPC_BUSY, UINT64, AVERAGE),
-       COUNTABLE(TPC_STALLED, UINT64, AVERAGE),
-       COUNTABLE(TPC_STARVED, UINT64, AVERAGE),
-       COUNTABLE(TPC_WORKING, UINT64, AVERAGE),
-       COUNTABLE(TPC_WALKER_BUSY, UINT64, AVERAGE),
-       COUNTABLE(TPC_WALKER_STALLED, UINT64, AVERAGE),
-       COUNTABLE(TPC_WALKER_WORKING, UINT64, AVERAGE),
-       COUNTABLE(TPC_ALIGNER_BUSY, UINT64, AVERAGE),
-       COUNTABLE(TPC_ALIGNER_STALLED, UINT64, AVERAGE),
-       COUNTABLE(TPC_ALIGNER_STALLED_BY_BLEND, UINT64, AVERAGE),
-       COUNTABLE(TPC_ALIGNER_STALLED_BY_CACHE, UINT64, AVERAGE),
-       COUNTABLE(TPC_ALIGNER_WORKING, UINT64, AVERAGE),
-       COUNTABLE(TPC_BLEND_BUSY, UINT64, AVERAGE),
-       COUNTABLE(TPC_BLEND_SYNC, UINT64, AVERAGE),
-       COUNTABLE(TPC_BLEND_STARVED, UINT64, AVERAGE),
-       COUNTABLE(TPC_BLEND_WORKING, UINT64, AVERAGE),
-       COUNTABLE(OPCODE_0x00, UINT64, AVERAGE),
-       COUNTABLE(OPCODE_0x01, UINT64, AVERAGE),
-       COUNTABLE(OPCODE_0x04, UINT64, AVERAGE),
-       COUNTABLE(OPCODE_0x10, UINT64, AVERAGE),
-       COUNTABLE(OPCODE_0x11, UINT64, AVERAGE),
-       COUNTABLE(OPCODE_0x12, UINT64, AVERAGE),
-       COUNTABLE(OPCODE_0x13, UINT64, AVERAGE),
-       COUNTABLE(OPCODE_0x18, UINT64, AVERAGE),
-       COUNTABLE(OPCODE_0x19, UINT64, AVERAGE),
-       COUNTABLE(OPCODE_0x1A, UINT64, AVERAGE),
-       COUNTABLE(OPCODE_OTHER, UINT64, AVERAGE),
-       COUNTABLE(IN_FIFO_0_EMPTY, UINT64, AVERAGE),
-       COUNTABLE(IN_FIFO_0_LT_HALF_FULL, UINT64, AVERAGE),
-       COUNTABLE(IN_FIFO_0_HALF_FULL, UINT64, AVERAGE),
-       COUNTABLE(IN_FIFO_0_FULL, UINT64, AVERAGE),
-       COUNTABLE(IN_FIFO_TPC_EMPTY, UINT64, AVERAGE),
-       COUNTABLE(IN_FIFO_TPC_LT_HALF_FULL, UINT64, AVERAGE),
-       COUNTABLE(IN_FIFO_TPC_HALF_FULL, UINT64, AVERAGE),
-       COUNTABLE(IN_FIFO_TPC_FULL, UINT64, AVERAGE),
-       COUNTABLE(TPC_TC_XFC, UINT64, AVERAGE),
-       COUNTABLE(TPC_TC_STATE, UINT64, AVERAGE),
-       COUNTABLE(TC_STALL, UINT64, AVERAGE),
-       COUNTABLE(QUAD0_TAPS, UINT64, AVERAGE),
-       COUNTABLE(QUADS, UINT64, AVERAGE),
-       COUNTABLE(TCA_SYNC_STALL, UINT64, AVERAGE),
-       COUNTABLE(TAG_STALL, UINT64, AVERAGE),
-       COUNTABLE(TCB_SYNC_STALL, UINT64, AVERAGE),
-       COUNTABLE(TCA_VALID, UINT64, AVERAGE),
-       COUNTABLE(PROBES_VALID, UINT64, AVERAGE),
-       COUNTABLE(MISS_STALL, UINT64, AVERAGE),
-       COUNTABLE(FETCH_FIFO_STALL, UINT64, AVERAGE),
-       COUNTABLE(TCO_STALL, UINT64, AVERAGE),
-       COUNTABLE(ANY_STALL, UINT64, AVERAGE),
-       COUNTABLE(TAG_MISSES, UINT64, AVERAGE),
-       COUNTABLE(TAG_HITS, UINT64, AVERAGE),
-       COUNTABLE(SUB_TAG_MISSES, UINT64, AVERAGE),
-       COUNTABLE(SET0_INVALIDATES, UINT64, AVERAGE),
-       COUNTABLE(SET1_INVALIDATES, UINT64, AVERAGE),
-       COUNTABLE(SET2_INVALIDATES, UINT64, AVERAGE),
-       COUNTABLE(SET3_INVALIDATES, UINT64, AVERAGE),
-       COUNTABLE(SET0_TAG_MISSES, UINT64, AVERAGE),
-       COUNTABLE(SET1_TAG_MISSES, UINT64, AVERAGE),
-       COUNTABLE(SET2_TAG_MISSES, UINT64, AVERAGE),
-       COUNTABLE(SET3_TAG_MISSES, UINT64, AVERAGE),
-       COUNTABLE(SET0_TAG_HITS, UINT64, AVERAGE),
-       COUNTABLE(SET1_TAG_HITS, UINT64, AVERAGE),
-       COUNTABLE(SET2_TAG_HITS, UINT64, AVERAGE),
-       COUNTABLE(SET3_TAG_HITS, UINT64, AVERAGE),
-       COUNTABLE(SET0_SUB_TAG_MISSES, UINT64, AVERAGE),
-       COUNTABLE(SET1_SUB_TAG_MISSES, UINT64, AVERAGE),
-       COUNTABLE(SET2_SUB_TAG_MISSES, UINT64, AVERAGE),
-       COUNTABLE(SET3_SUB_TAG_MISSES, UINT64, AVERAGE),
-       COUNTABLE(SET0_EVICT1, UINT64, AVERAGE),
-       COUNTABLE(SET0_EVICT2, UINT64, AVERAGE),
-       COUNTABLE(SET0_EVICT3, UINT64, AVERAGE),
-       COUNTABLE(SET0_EVICT4, UINT64, AVERAGE),
-       COUNTABLE(SET0_EVICT5, UINT64, AVERAGE),
-       COUNTABLE(SET0_EVICT6, UINT64, AVERAGE),
-       COUNTABLE(SET0_EVICT7, UINT64, AVERAGE),
-       COUNTABLE(SET0_EVICT8, UINT64, AVERAGE),
-       COUNTABLE(SET1_EVICT1, UINT64, AVERAGE),
-       COUNTABLE(SET1_EVICT2, UINT64, AVERAGE),
-       COUNTABLE(SET1_EVICT3, UINT64, AVERAGE),
-       COUNTABLE(SET1_EVICT4, UINT64, AVERAGE),
-       COUNTABLE(SET1_EVICT5, UINT64, AVERAGE),
-       COUNTABLE(SET1_EVICT6, UINT64, AVERAGE),
-       COUNTABLE(SET1_EVICT7, UINT64, AVERAGE),
-       COUNTABLE(SET1_EVICT8, UINT64, AVERAGE),
-       COUNTABLE(SET2_EVICT1, UINT64, AVERAGE),
-       COUNTABLE(SET2_EVICT2, UINT64, AVERAGE),
-       COUNTABLE(SET2_EVICT3, UINT64, AVERAGE),
-       COUNTABLE(SET2_EVICT4, UINT64, AVERAGE),
-       COUNTABLE(SET2_EVICT5, UINT64, AVERAGE),
-       COUNTABLE(SET2_EVICT6, UINT64, AVERAGE),
-       COUNTABLE(SET2_EVICT7, UINT64, AVERAGE),
-       COUNTABLE(SET2_EVICT8, UINT64, AVERAGE),
-       COUNTABLE(SET3_EVICT1, UINT64, AVERAGE),
-       COUNTABLE(SET3_EVICT2, UINT64, AVERAGE),
-       COUNTABLE(SET3_EVICT3, UINT64, AVERAGE),
-       COUNTABLE(SET3_EVICT4, UINT64, AVERAGE),
-       COUNTABLE(SET3_EVICT5, UINT64, AVERAGE),
-       COUNTABLE(SET3_EVICT6, UINT64, AVERAGE),
-       COUNTABLE(SET3_EVICT7, UINT64, AVERAGE),
-       COUNTABLE(SET3_EVICT8, UINT64, AVERAGE),
-       COUNTABLE(FF_EMPTY, UINT64, AVERAGE),
-       COUNTABLE(FF_LT_HALF_FULL, UINT64, AVERAGE),
-       COUNTABLE(FF_HALF_FULL, UINT64, AVERAGE),
-       COUNTABLE(FF_FULL, UINT64, AVERAGE),
-       COUNTABLE(FF_XFC, UINT64, AVERAGE),
-       COUNTABLE(FF_STALLED, UINT64, AVERAGE),
-       COUNTABLE(FG_MASKS, UINT64, AVERAGE),
-       COUNTABLE(FG_LEFT_MASKS, UINT64, AVERAGE),
-       COUNTABLE(FG_LEFT_MASK_STALLED, UINT64, AVERAGE),
-       COUNTABLE(FG_LEFT_NOT_DONE_STALL, UINT64, AVERAGE),
-       COUNTABLE(FG_LEFT_FG_STALL, UINT64, AVERAGE),
-       COUNTABLE(FG_LEFT_SECTORS, UINT64, AVERAGE),
-       COUNTABLE(FG0_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(FG0_STALLED, UINT64, AVERAGE),
-       COUNTABLE(MEM_REQ512, UINT64, AVERAGE),
-       COUNTABLE(MEM_REQ_SENT, UINT64, AVERAGE),
-       COUNTABLE(MEM_LOCAL_READ_REQ, UINT64, AVERAGE),
-       COUNTABLE(TC0_MH_STALLED, UINT64, AVERAGE),
+      COUNTABLE(VALID_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(SINGLE_PHASES, UINT64, AVERAGE),
+      COUNTABLE(ANISO_PHASES, UINT64, AVERAGE),
+      COUNTABLE(MIP_PHASES, UINT64, AVERAGE),
+      COUNTABLE(VOL_PHASES, UINT64, AVERAGE),
+      COUNTABLE(MIP_VOL_PHASES, UINT64, AVERAGE),
+      COUNTABLE(MIP_ANISO_PHASES, UINT64, AVERAGE),
+      COUNTABLE(VOL_ANISO_PHASES, UINT64, AVERAGE),
+      COUNTABLE(ANISO_2_1_PHASES, UINT64, AVERAGE),
+      COUNTABLE(ANISO_4_1_PHASES, UINT64, AVERAGE),
+      COUNTABLE(ANISO_6_1_PHASES, UINT64, AVERAGE),
+      COUNTABLE(ANISO_8_1_PHASES, UINT64, AVERAGE),
+      COUNTABLE(ANISO_10_1_PHASES, UINT64, AVERAGE),
+      COUNTABLE(ANISO_12_1_PHASES, UINT64, AVERAGE),
+      COUNTABLE(ANISO_14_1_PHASES, UINT64, AVERAGE),
+      COUNTABLE(ANISO_16_1_PHASES, UINT64, AVERAGE),
+      COUNTABLE(MIP_VOL_ANISO_PHASES, UINT64, AVERAGE),
+      COUNTABLE(ALIGN_2_PHASES, UINT64, AVERAGE),
+      COUNTABLE(ALIGN_4_PHASES, UINT64, AVERAGE),
+      COUNTABLE(TPC_BUSY, UINT64, AVERAGE),
+      COUNTABLE(TPC_STALLED, UINT64, AVERAGE),
+      COUNTABLE(TPC_STARVED, UINT64, AVERAGE),
+      COUNTABLE(TPC_WORKING, UINT64, AVERAGE),
+      COUNTABLE(TPC_WALKER_BUSY, UINT64, AVERAGE),
+      COUNTABLE(TPC_WALKER_STALLED, UINT64, AVERAGE),
+      COUNTABLE(TPC_WALKER_WORKING, UINT64, AVERAGE),
+      COUNTABLE(TPC_ALIGNER_BUSY, UINT64, AVERAGE),
+      COUNTABLE(TPC_ALIGNER_STALLED, UINT64, AVERAGE),
+      COUNTABLE(TPC_ALIGNER_STALLED_BY_BLEND, UINT64, AVERAGE),
+      COUNTABLE(TPC_ALIGNER_STALLED_BY_CACHE, UINT64, AVERAGE),
+      COUNTABLE(TPC_ALIGNER_WORKING, UINT64, AVERAGE),
+      COUNTABLE(TPC_BLEND_BUSY, UINT64, AVERAGE),
+      COUNTABLE(TPC_BLEND_SYNC, UINT64, AVERAGE),
+      COUNTABLE(TPC_BLEND_STARVED, UINT64, AVERAGE),
+      COUNTABLE(TPC_BLEND_WORKING, UINT64, AVERAGE),
+      COUNTABLE(OPCODE_0x00, UINT64, AVERAGE),
+      COUNTABLE(OPCODE_0x01, UINT64, AVERAGE),
+      COUNTABLE(OPCODE_0x04, UINT64, AVERAGE),
+      COUNTABLE(OPCODE_0x10, UINT64, AVERAGE),
+      COUNTABLE(OPCODE_0x11, UINT64, AVERAGE),
+      COUNTABLE(OPCODE_0x12, UINT64, AVERAGE),
+      COUNTABLE(OPCODE_0x13, UINT64, AVERAGE),
+      COUNTABLE(OPCODE_0x18, UINT64, AVERAGE),
+      COUNTABLE(OPCODE_0x19, UINT64, AVERAGE),
+      COUNTABLE(OPCODE_0x1A, UINT64, AVERAGE),
+      COUNTABLE(OPCODE_OTHER, UINT64, AVERAGE),
+      COUNTABLE(IN_FIFO_0_EMPTY, UINT64, AVERAGE),
+      COUNTABLE(IN_FIFO_0_LT_HALF_FULL, UINT64, AVERAGE),
+      COUNTABLE(IN_FIFO_0_HALF_FULL, UINT64, AVERAGE),
+      COUNTABLE(IN_FIFO_0_FULL, UINT64, AVERAGE),
+      COUNTABLE(IN_FIFO_TPC_EMPTY, UINT64, AVERAGE),
+      COUNTABLE(IN_FIFO_TPC_LT_HALF_FULL, UINT64, AVERAGE),
+      COUNTABLE(IN_FIFO_TPC_HALF_FULL, UINT64, AVERAGE),
+      COUNTABLE(IN_FIFO_TPC_FULL, UINT64, AVERAGE),
+      COUNTABLE(TPC_TC_XFC, UINT64, AVERAGE),
+      COUNTABLE(TPC_TC_STATE, UINT64, AVERAGE),
+      COUNTABLE(TC_STALL, UINT64, AVERAGE),
+      COUNTABLE(QUAD0_TAPS, UINT64, AVERAGE),
+      COUNTABLE(QUADS, UINT64, AVERAGE),
+      COUNTABLE(TCA_SYNC_STALL, UINT64, AVERAGE),
+      COUNTABLE(TAG_STALL, UINT64, AVERAGE),
+      COUNTABLE(TCB_SYNC_STALL, UINT64, AVERAGE),
+      COUNTABLE(TCA_VALID, UINT64, AVERAGE),
+      COUNTABLE(PROBES_VALID, UINT64, AVERAGE),
+      COUNTABLE(MISS_STALL, UINT64, AVERAGE),
+      COUNTABLE(FETCH_FIFO_STALL, UINT64, AVERAGE),
+      COUNTABLE(TCO_STALL, UINT64, AVERAGE),
+      COUNTABLE(ANY_STALL, UINT64, AVERAGE),
+      COUNTABLE(TAG_MISSES, UINT64, AVERAGE),
+      COUNTABLE(TAG_HITS, UINT64, AVERAGE),
+      COUNTABLE(SUB_TAG_MISSES, UINT64, AVERAGE),
+      COUNTABLE(SET0_INVALIDATES, UINT64, AVERAGE),
+      COUNTABLE(SET1_INVALIDATES, UINT64, AVERAGE),
+      COUNTABLE(SET2_INVALIDATES, UINT64, AVERAGE),
+      COUNTABLE(SET3_INVALIDATES, UINT64, AVERAGE),
+      COUNTABLE(SET0_TAG_MISSES, UINT64, AVERAGE),
+      COUNTABLE(SET1_TAG_MISSES, UINT64, AVERAGE),
+      COUNTABLE(SET2_TAG_MISSES, UINT64, AVERAGE),
+      COUNTABLE(SET3_TAG_MISSES, UINT64, AVERAGE),
+      COUNTABLE(SET0_TAG_HITS, UINT64, AVERAGE),
+      COUNTABLE(SET1_TAG_HITS, UINT64, AVERAGE),
+      COUNTABLE(SET2_TAG_HITS, UINT64, AVERAGE),
+      COUNTABLE(SET3_TAG_HITS, UINT64, AVERAGE),
+      COUNTABLE(SET0_SUB_TAG_MISSES, UINT64, AVERAGE),
+      COUNTABLE(SET1_SUB_TAG_MISSES, UINT64, AVERAGE),
+      COUNTABLE(SET2_SUB_TAG_MISSES, UINT64, AVERAGE),
+      COUNTABLE(SET3_SUB_TAG_MISSES, UINT64, AVERAGE),
+      COUNTABLE(SET0_EVICT1, UINT64, AVERAGE),
+      COUNTABLE(SET0_EVICT2, UINT64, AVERAGE),
+      COUNTABLE(SET0_EVICT3, UINT64, AVERAGE),
+      COUNTABLE(SET0_EVICT4, UINT64, AVERAGE),
+      COUNTABLE(SET0_EVICT5, UINT64, AVERAGE),
+      COUNTABLE(SET0_EVICT6, UINT64, AVERAGE),
+      COUNTABLE(SET0_EVICT7, UINT64, AVERAGE),
+      COUNTABLE(SET0_EVICT8, UINT64, AVERAGE),
+      COUNTABLE(SET1_EVICT1, UINT64, AVERAGE),
+      COUNTABLE(SET1_EVICT2, UINT64, AVERAGE),
+      COUNTABLE(SET1_EVICT3, UINT64, AVERAGE),
+      COUNTABLE(SET1_EVICT4, UINT64, AVERAGE),
+      COUNTABLE(SET1_EVICT5, UINT64, AVERAGE),
+      COUNTABLE(SET1_EVICT6, UINT64, AVERAGE),
+      COUNTABLE(SET1_EVICT7, UINT64, AVERAGE),
+      COUNTABLE(SET1_EVICT8, UINT64, AVERAGE),
+      COUNTABLE(SET2_EVICT1, UINT64, AVERAGE),
+      COUNTABLE(SET2_EVICT2, UINT64, AVERAGE),
+      COUNTABLE(SET2_EVICT3, UINT64, AVERAGE),
+      COUNTABLE(SET2_EVICT4, UINT64, AVERAGE),
+      COUNTABLE(SET2_EVICT5, UINT64, AVERAGE),
+      COUNTABLE(SET2_EVICT6, UINT64, AVERAGE),
+      COUNTABLE(SET2_EVICT7, UINT64, AVERAGE),
+      COUNTABLE(SET2_EVICT8, UINT64, AVERAGE),
+      COUNTABLE(SET3_EVICT1, UINT64, AVERAGE),
+      COUNTABLE(SET3_EVICT2, UINT64, AVERAGE),
+      COUNTABLE(SET3_EVICT3, UINT64, AVERAGE),
+      COUNTABLE(SET3_EVICT4, UINT64, AVERAGE),
+      COUNTABLE(SET3_EVICT5, UINT64, AVERAGE),
+      COUNTABLE(SET3_EVICT6, UINT64, AVERAGE),
+      COUNTABLE(SET3_EVICT7, UINT64, AVERAGE),
+      COUNTABLE(SET3_EVICT8, UINT64, AVERAGE),
+      COUNTABLE(FF_EMPTY, UINT64, AVERAGE),
+      COUNTABLE(FF_LT_HALF_FULL, UINT64, AVERAGE),
+      COUNTABLE(FF_HALF_FULL, UINT64, AVERAGE),
+      COUNTABLE(FF_FULL, UINT64, AVERAGE),
+      COUNTABLE(FF_XFC, UINT64, AVERAGE),
+      COUNTABLE(FF_STALLED, UINT64, AVERAGE),
+      COUNTABLE(FG_MASKS, UINT64, AVERAGE),
+      COUNTABLE(FG_LEFT_MASKS, UINT64, AVERAGE),
+      COUNTABLE(FG_LEFT_MASK_STALLED, UINT64, AVERAGE),
+      COUNTABLE(FG_LEFT_NOT_DONE_STALL, UINT64, AVERAGE),
+      COUNTABLE(FG_LEFT_FG_STALL, UINT64, AVERAGE),
+      COUNTABLE(FG_LEFT_SECTORS, UINT64, AVERAGE),
+      COUNTABLE(FG0_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(FG0_STALLED, UINT64, AVERAGE),
+      COUNTABLE(MEM_REQ512, UINT64, AVERAGE),
+      COUNTABLE(MEM_REQ_SENT, UINT64, AVERAGE),
+      COUNTABLE(MEM_LOCAL_READ_REQ, UINT64, AVERAGE),
+      COUNTABLE(TC0_MH_STALLED, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_countable sq_countables[] = {
-       COUNTABLE(SQ_PIXEL_VECTORS_SUB, UINT64, AVERAGE),
-       COUNTABLE(SQ_VERTEX_VECTORS_SUB, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD0, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD0, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD0, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD0, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD1, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD1, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD1, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD1, UINT64, AVERAGE),
-       COUNTABLE(SQ_EXPORT_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU_CST_WRITTEN, UINT64, AVERAGE),
-       COUNTABLE(SQ_TEX_CST_WRITTEN, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU_CST_STALL, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU_TEX_STALL, UINT64, AVERAGE),
-       COUNTABLE(SQ_INST_WRITTEN, UINT64, AVERAGE),
-       COUNTABLE(SQ_BOOLEAN_WRITTEN, UINT64, AVERAGE),
-       COUNTABLE(SQ_LOOPS_WRITTEN, UINT64, AVERAGE),
-       COUNTABLE(SQ_PIXEL_SWAP_IN, UINT64, AVERAGE),
-       COUNTABLE(SQ_PIXEL_SWAP_OUT, UINT64, AVERAGE),
-       COUNTABLE(SQ_VERTEX_SWAP_IN, UINT64, AVERAGE),
-       COUNTABLE(SQ_VERTEX_SWAP_OUT, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU_VTX_INST_ISSUED, UINT64, AVERAGE),
-       COUNTABLE(SQ_TEX_VTX_INST_ISSUED, UINT64, AVERAGE),
-       COUNTABLE(SQ_VC_VTX_INST_ISSUED, UINT64, AVERAGE),
-       COUNTABLE(SQ_CF_VTX_INST_ISSUED, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU_PIX_INST_ISSUED, UINT64, AVERAGE),
-       COUNTABLE(SQ_TEX_PIX_INST_ISSUED, UINT64, AVERAGE),
-       COUNTABLE(SQ_VC_PIX_INST_ISSUED, UINT64, AVERAGE),
-       COUNTABLE(SQ_CF_PIX_INST_ISSUED, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD0, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD0, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD1, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD1, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU_NOPS, UINT64, AVERAGE),
-       COUNTABLE(SQ_PRED_SKIP, UINT64, AVERAGE),
-       COUNTABLE(SQ_SYNC_ALU_STALL_SIMD0_VTX, UINT64, AVERAGE),
-       COUNTABLE(SQ_SYNC_ALU_STALL_SIMD1_VTX, UINT64, AVERAGE),
-       COUNTABLE(SQ_SYNC_TEX_STALL_VTX, UINT64, AVERAGE),
-       COUNTABLE(SQ_SYNC_VC_STALL_VTX, UINT64, AVERAGE),
-       COUNTABLE(SQ_CONSTANTS_USED_SIMD0, UINT64, AVERAGE),
-       COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD0, UINT64, AVERAGE),
-       COUNTABLE(SQ_GPR_STALL_VTX, UINT64, AVERAGE),
-       COUNTABLE(SQ_GPR_STALL_PIX, UINT64, AVERAGE),
-       COUNTABLE(SQ_VTX_RS_STALL, UINT64, AVERAGE),
-       COUNTABLE(SQ_PIX_RS_STALL, UINT64, AVERAGE),
-       COUNTABLE(SQ_SX_PC_FULL, UINT64, AVERAGE),
-       COUNTABLE(SQ_SX_EXP_BUFF_FULL, UINT64, AVERAGE),
-       COUNTABLE(SQ_SX_POS_BUFF_FULL, UINT64, AVERAGE),
-       COUNTABLE(SQ_INTERP_QUADS, UINT64, AVERAGE),
-       COUNTABLE(SQ_INTERP_ACTIVE, UINT64, AVERAGE),
-       COUNTABLE(SQ_IN_PIXEL_STALL, UINT64, AVERAGE),
-       COUNTABLE(SQ_IN_VTX_STALL, UINT64, AVERAGE),
-       COUNTABLE(SQ_VTX_CNT, UINT64, AVERAGE),
-       COUNTABLE(SQ_VTX_VECTOR2, UINT64, AVERAGE),
-       COUNTABLE(SQ_VTX_VECTOR3, UINT64, AVERAGE),
-       COUNTABLE(SQ_VTX_VECTOR4, UINT64, AVERAGE),
-       COUNTABLE(SQ_PIXEL_VECTOR1, UINT64, AVERAGE),
-       COUNTABLE(SQ_PIXEL_VECTOR23, UINT64, AVERAGE),
-       COUNTABLE(SQ_PIXEL_VECTOR4, UINT64, AVERAGE),
-       COUNTABLE(SQ_CONSTANTS_USED_SIMD1, UINT64, AVERAGE),
-       COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD1, UINT64, AVERAGE),
-       COUNTABLE(SQ_SX_MEM_EXP_FULL, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD2, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD2, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD2, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD2, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD3, UINT64, AVERAGE),
-       COUNTABLE(SQ_PERFCOUNT_VTX_QUAL_TP_DONE, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD3, UINT64, AVERAGE),
-       COUNTABLE(SQ_PERFCOUNT_PIX_QUAL_TP_DONE, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD2, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD2, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD3, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD3, UINT64, AVERAGE),
-       COUNTABLE(SQ_SYNC_ALU_STALL_SIMD2_VTX, UINT64, AVERAGE),
-       COUNTABLE(SQ_PERFCOUNT_VTX_POP_THREAD, UINT64, AVERAGE),
-       COUNTABLE(SQ_SYNC_ALU_STALL_SIMD0_PIX, UINT64, AVERAGE),
-       COUNTABLE(SQ_SYNC_ALU_STALL_SIMD1_PIX, UINT64, AVERAGE),
-       COUNTABLE(SQ_SYNC_ALU_STALL_SIMD2_PIX, UINT64, AVERAGE),
-       COUNTABLE(SQ_PERFCOUNT_PIX_POP_THREAD, UINT64, AVERAGE),
-       COUNTABLE(SQ_SYNC_TEX_STALL_PIX, UINT64, AVERAGE),
-       COUNTABLE(SQ_SYNC_VC_STALL_PIX, UINT64, AVERAGE),
-       COUNTABLE(SQ_CONSTANTS_USED_SIMD2, UINT64, AVERAGE),
-       COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD2, UINT64, AVERAGE),
-       COUNTABLE(SQ_PERFCOUNT_VTX_DEALLOC_ACK, UINT64, AVERAGE),
-       COUNTABLE(SQ_PERFCOUNT_PIX_DEALLOC_ACK, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD0, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD0, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD1, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD1, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD2, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD2, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD3, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD3, UINT64, AVERAGE),
-       COUNTABLE(VC_PERF_STATIC, UINT64, AVERAGE),
-       COUNTABLE(VC_PERF_STALLED, UINT64, AVERAGE),
-       COUNTABLE(VC_PERF_STARVED, UINT64, AVERAGE),
-       COUNTABLE(VC_PERF_SEND, UINT64, AVERAGE),
-       COUNTABLE(VC_PERF_ACTUAL_STARVED, UINT64, AVERAGE),
-       COUNTABLE(PIXEL_THREAD_0_ACTIVE, UINT64, AVERAGE),
-       COUNTABLE(VERTEX_THREAD_0_ACTIVE, UINT64, AVERAGE),
-       COUNTABLE(PIXEL_THREAD_0_NUMBER, UINT64, AVERAGE),
-       COUNTABLE(VERTEX_THREAD_0_NUMBER, UINT64, AVERAGE),
-       COUNTABLE(VERTEX_EVENT_NUMBER, UINT64, AVERAGE),
-       COUNTABLE(PIXEL_EVENT_NUMBER, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_EF_PUSH, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_EF_POP_EVENT, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_EF_POP_NEW_VTX, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_EF_POP_DEALLOC, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_EF_POP_PVECTOR, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_EF_POP_PVECTOR_X, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_EF_POP_PVECTOR_VNZ, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_PB_DEALLOC, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_PI_STATE_PPB_POP, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_PI_RTR, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_PI_READ_EN, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_PI_BUFF_SWAP, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_SQ_FREE_BUFF, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_SQ_DEC, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_SC_VALID_CNTL_EVENT, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_SC_VALID_IJ_XFER, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_SC_NEW_VECTOR_1_Q, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_QUAL_NEW_VECTOR, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_QUAL_EVENT, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_END_BUFFER, UINT64, AVERAGE),
-       COUNTABLE(PTRBUFF_FILL_QUAD, UINT64, AVERAGE),
-       COUNTABLE(VERTS_WRITTEN_SPI, UINT64, AVERAGE),
-       COUNTABLE(TP_FETCH_INSTR_EXEC, UINT64, AVERAGE),
-       COUNTABLE(TP_FETCH_INSTR_REQ, UINT64, AVERAGE),
-       COUNTABLE(TP_DATA_RETURN, UINT64, AVERAGE),
-       COUNTABLE(SPI_WRITE_CYCLES_SP, UINT64, AVERAGE),
-       COUNTABLE(SPI_WRITES_SP, UINT64, AVERAGE),
-       COUNTABLE(SP_ALU_INSTR_EXEC, UINT64, AVERAGE),
-       COUNTABLE(SP_CONST_ADDR_TO_SQ, UINT64, AVERAGE),
-       COUNTABLE(SP_PRED_KILLS_TO_SQ, UINT64, AVERAGE),
-       COUNTABLE(SP_EXPORT_CYCLES_TO_SX, UINT64, AVERAGE),
-       COUNTABLE(SP_EXPORTS_TO_SX, UINT64, AVERAGE),
-       COUNTABLE(SQ_CYCLES_ELAPSED, UINT64, AVERAGE),
-       COUNTABLE(SQ_TCFS_OPT_ALLOC_EXEC, UINT64, AVERAGE),
-       COUNTABLE(SQ_TCFS_NO_OPT_ALLOC, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_NO_OPT_ALLOC, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_NO_OPT_ALLOC, UINT64, AVERAGE),
-       COUNTABLE(SQ_TCFS_ARB_XFC_CNT, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_ARB_XFC_CNT, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_ARB_XFC_CNT, UINT64, AVERAGE),
-       COUNTABLE(SQ_TCFS_CFS_UPDATE_CNT, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU0_CFS_UPDATE_CNT, UINT64, AVERAGE),
-       COUNTABLE(SQ_ALU1_CFS_UPDATE_CNT, UINT64, AVERAGE),
-       COUNTABLE(SQ_VTX_PUSH_THREAD_CNT, UINT64, AVERAGE),
-       COUNTABLE(SQ_VTX_POP_THREAD_CNT, UINT64, AVERAGE),
-       COUNTABLE(SQ_PIX_PUSH_THREAD_CNT, UINT64, AVERAGE),
-       COUNTABLE(SQ_PIX_POP_THREAD_CNT, UINT64, AVERAGE),
-       COUNTABLE(SQ_PIX_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(SQ_PIX_KILLED, UINT64, AVERAGE),
+      COUNTABLE(SQ_PIXEL_VECTORS_SUB, UINT64, AVERAGE),
+      COUNTABLE(SQ_VERTEX_VECTORS_SUB, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD0, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD0, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD0, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD0, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD1, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD1, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD1, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD1, UINT64, AVERAGE),
+      COUNTABLE(SQ_EXPORT_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU_CST_WRITTEN, UINT64, AVERAGE),
+      COUNTABLE(SQ_TEX_CST_WRITTEN, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU_CST_STALL, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU_TEX_STALL, UINT64, AVERAGE),
+      COUNTABLE(SQ_INST_WRITTEN, UINT64, AVERAGE),
+      COUNTABLE(SQ_BOOLEAN_WRITTEN, UINT64, AVERAGE),
+      COUNTABLE(SQ_LOOPS_WRITTEN, UINT64, AVERAGE),
+      COUNTABLE(SQ_PIXEL_SWAP_IN, UINT64, AVERAGE),
+      COUNTABLE(SQ_PIXEL_SWAP_OUT, UINT64, AVERAGE),
+      COUNTABLE(SQ_VERTEX_SWAP_IN, UINT64, AVERAGE),
+      COUNTABLE(SQ_VERTEX_SWAP_OUT, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU_VTX_INST_ISSUED, UINT64, AVERAGE),
+      COUNTABLE(SQ_TEX_VTX_INST_ISSUED, UINT64, AVERAGE),
+      COUNTABLE(SQ_VC_VTX_INST_ISSUED, UINT64, AVERAGE),
+      COUNTABLE(SQ_CF_VTX_INST_ISSUED, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU_PIX_INST_ISSUED, UINT64, AVERAGE),
+      COUNTABLE(SQ_TEX_PIX_INST_ISSUED, UINT64, AVERAGE),
+      COUNTABLE(SQ_VC_PIX_INST_ISSUED, UINT64, AVERAGE),
+      COUNTABLE(SQ_CF_PIX_INST_ISSUED, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD0, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD0, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD1, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD1, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU_NOPS, UINT64, AVERAGE),
+      COUNTABLE(SQ_PRED_SKIP, UINT64, AVERAGE),
+      COUNTABLE(SQ_SYNC_ALU_STALL_SIMD0_VTX, UINT64, AVERAGE),
+      COUNTABLE(SQ_SYNC_ALU_STALL_SIMD1_VTX, UINT64, AVERAGE),
+      COUNTABLE(SQ_SYNC_TEX_STALL_VTX, UINT64, AVERAGE),
+      COUNTABLE(SQ_SYNC_VC_STALL_VTX, UINT64, AVERAGE),
+      COUNTABLE(SQ_CONSTANTS_USED_SIMD0, UINT64, AVERAGE),
+      COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD0, UINT64, AVERAGE),
+      COUNTABLE(SQ_GPR_STALL_VTX, UINT64, AVERAGE),
+      COUNTABLE(SQ_GPR_STALL_PIX, UINT64, AVERAGE),
+      COUNTABLE(SQ_VTX_RS_STALL, UINT64, AVERAGE),
+      COUNTABLE(SQ_PIX_RS_STALL, UINT64, AVERAGE),
+      COUNTABLE(SQ_SX_PC_FULL, UINT64, AVERAGE),
+      COUNTABLE(SQ_SX_EXP_BUFF_FULL, UINT64, AVERAGE),
+      COUNTABLE(SQ_SX_POS_BUFF_FULL, UINT64, AVERAGE),
+      COUNTABLE(SQ_INTERP_QUADS, UINT64, AVERAGE),
+      COUNTABLE(SQ_INTERP_ACTIVE, UINT64, AVERAGE),
+      COUNTABLE(SQ_IN_PIXEL_STALL, UINT64, AVERAGE),
+      COUNTABLE(SQ_IN_VTX_STALL, UINT64, AVERAGE),
+      COUNTABLE(SQ_VTX_CNT, UINT64, AVERAGE),
+      COUNTABLE(SQ_VTX_VECTOR2, UINT64, AVERAGE),
+      COUNTABLE(SQ_VTX_VECTOR3, UINT64, AVERAGE),
+      COUNTABLE(SQ_VTX_VECTOR4, UINT64, AVERAGE),
+      COUNTABLE(SQ_PIXEL_VECTOR1, UINT64, AVERAGE),
+      COUNTABLE(SQ_PIXEL_VECTOR23, UINT64, AVERAGE),
+      COUNTABLE(SQ_PIXEL_VECTOR4, UINT64, AVERAGE),
+      COUNTABLE(SQ_CONSTANTS_USED_SIMD1, UINT64, AVERAGE),
+      COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD1, UINT64, AVERAGE),
+      COUNTABLE(SQ_SX_MEM_EXP_FULL, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD2, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD2, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD2, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD2, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD3, UINT64, AVERAGE),
+      COUNTABLE(SQ_PERFCOUNT_VTX_QUAL_TP_DONE, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD3, UINT64, AVERAGE),
+      COUNTABLE(SQ_PERFCOUNT_PIX_QUAL_TP_DONE, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD2, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD2, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD3, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD3, UINT64, AVERAGE),
+      COUNTABLE(SQ_SYNC_ALU_STALL_SIMD2_VTX, UINT64, AVERAGE),
+      COUNTABLE(SQ_PERFCOUNT_VTX_POP_THREAD, UINT64, AVERAGE),
+      COUNTABLE(SQ_SYNC_ALU_STALL_SIMD0_PIX, UINT64, AVERAGE),
+      COUNTABLE(SQ_SYNC_ALU_STALL_SIMD1_PIX, UINT64, AVERAGE),
+      COUNTABLE(SQ_SYNC_ALU_STALL_SIMD2_PIX, UINT64, AVERAGE),
+      COUNTABLE(SQ_PERFCOUNT_PIX_POP_THREAD, UINT64, AVERAGE),
+      COUNTABLE(SQ_SYNC_TEX_STALL_PIX, UINT64, AVERAGE),
+      COUNTABLE(SQ_SYNC_VC_STALL_PIX, UINT64, AVERAGE),
+      COUNTABLE(SQ_CONSTANTS_USED_SIMD2, UINT64, AVERAGE),
+      COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD2, UINT64, AVERAGE),
+      COUNTABLE(SQ_PERFCOUNT_VTX_DEALLOC_ACK, UINT64, AVERAGE),
+      COUNTABLE(SQ_PERFCOUNT_PIX_DEALLOC_ACK, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD0, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD0, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD1, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD1, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD2, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD2, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD3, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD3, UINT64, AVERAGE),
+      COUNTABLE(VC_PERF_STATIC, UINT64, AVERAGE),
+      COUNTABLE(VC_PERF_STALLED, UINT64, AVERAGE),
+      COUNTABLE(VC_PERF_STARVED, UINT64, AVERAGE),
+      COUNTABLE(VC_PERF_SEND, UINT64, AVERAGE),
+      COUNTABLE(VC_PERF_ACTUAL_STARVED, UINT64, AVERAGE),
+      COUNTABLE(PIXEL_THREAD_0_ACTIVE, UINT64, AVERAGE),
+      COUNTABLE(VERTEX_THREAD_0_ACTIVE, UINT64, AVERAGE),
+      COUNTABLE(PIXEL_THREAD_0_NUMBER, UINT64, AVERAGE),
+      COUNTABLE(VERTEX_THREAD_0_NUMBER, UINT64, AVERAGE),
+      COUNTABLE(VERTEX_EVENT_NUMBER, UINT64, AVERAGE),
+      COUNTABLE(PIXEL_EVENT_NUMBER, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_EF_PUSH, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_EF_POP_EVENT, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_EF_POP_NEW_VTX, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_EF_POP_DEALLOC, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_EF_POP_PVECTOR, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_EF_POP_PVECTOR_X, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_EF_POP_PVECTOR_VNZ, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_PB_DEALLOC, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_PI_STATE_PPB_POP, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_PI_RTR, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_PI_READ_EN, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_PI_BUFF_SWAP, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_SQ_FREE_BUFF, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_SQ_DEC, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_SC_VALID_CNTL_EVENT, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_SC_VALID_IJ_XFER, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_SC_NEW_VECTOR_1_Q, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_QUAL_NEW_VECTOR, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_QUAL_EVENT, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_END_BUFFER, UINT64, AVERAGE),
+      COUNTABLE(PTRBUFF_FILL_QUAD, UINT64, AVERAGE),
+      COUNTABLE(VERTS_WRITTEN_SPI, UINT64, AVERAGE),
+      COUNTABLE(TP_FETCH_INSTR_EXEC, UINT64, AVERAGE),
+      COUNTABLE(TP_FETCH_INSTR_REQ, UINT64, AVERAGE),
+      COUNTABLE(TP_DATA_RETURN, UINT64, AVERAGE),
+      COUNTABLE(SPI_WRITE_CYCLES_SP, UINT64, AVERAGE),
+      COUNTABLE(SPI_WRITES_SP, UINT64, AVERAGE),
+      COUNTABLE(SP_ALU_INSTR_EXEC, UINT64, AVERAGE),
+      COUNTABLE(SP_CONST_ADDR_TO_SQ, UINT64, AVERAGE),
+      COUNTABLE(SP_PRED_KILLS_TO_SQ, UINT64, AVERAGE),
+      COUNTABLE(SP_EXPORT_CYCLES_TO_SX, UINT64, AVERAGE),
+      COUNTABLE(SP_EXPORTS_TO_SX, UINT64, AVERAGE),
+      COUNTABLE(SQ_CYCLES_ELAPSED, UINT64, AVERAGE),
+      COUNTABLE(SQ_TCFS_OPT_ALLOC_EXEC, UINT64, AVERAGE),
+      COUNTABLE(SQ_TCFS_NO_OPT_ALLOC, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_NO_OPT_ALLOC, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_NO_OPT_ALLOC, UINT64, AVERAGE),
+      COUNTABLE(SQ_TCFS_ARB_XFC_CNT, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_ARB_XFC_CNT, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_ARB_XFC_CNT, UINT64, AVERAGE),
+      COUNTABLE(SQ_TCFS_CFS_UPDATE_CNT, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU0_CFS_UPDATE_CNT, UINT64, AVERAGE),
+      COUNTABLE(SQ_ALU1_CFS_UPDATE_CNT, UINT64, AVERAGE),
+      COUNTABLE(SQ_VTX_PUSH_THREAD_CNT, UINT64, AVERAGE),
+      COUNTABLE(SQ_VTX_POP_THREAD_CNT, UINT64, AVERAGE),
+      COUNTABLE(SQ_PIX_PUSH_THREAD_CNT, UINT64, AVERAGE),
+      COUNTABLE(SQ_PIX_POP_THREAD_CNT, UINT64, AVERAGE),
+      COUNTABLE(SQ_PIX_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(SQ_PIX_KILLED, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_countable sx_countables[] = {
-       COUNTABLE(SX_EXPORT_VECTORS, UINT64, AVERAGE),
-       COUNTABLE(SX_DUMMY_QUADS, UINT64, AVERAGE),
-       COUNTABLE(SX_ALPHA_FAIL, UINT64, AVERAGE),
-       COUNTABLE(SX_RB_QUAD_BUSY, UINT64, AVERAGE),
-       COUNTABLE(SX_RB_COLOR_BUSY, UINT64, AVERAGE),
-       COUNTABLE(SX_RB_QUAD_STALL, UINT64, AVERAGE),
-       COUNTABLE(SX_RB_COLOR_STALL, UINT64, AVERAGE),
+      COUNTABLE(SX_EXPORT_VECTORS, UINT64, AVERAGE),
+      COUNTABLE(SX_DUMMY_QUADS, UINT64, AVERAGE),
+      COUNTABLE(SX_ALPHA_FAIL, UINT64, AVERAGE),
+      COUNTABLE(SX_RB_QUAD_BUSY, UINT64, AVERAGE),
+      COUNTABLE(SX_RB_COLOR_BUSY, UINT64, AVERAGE),
+      COUNTABLE(SX_RB_QUAD_STALL, UINT64, AVERAGE),
+      COUNTABLE(SX_RB_COLOR_STALL, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_countable mh_countables[] = {
-       COUNTABLE(CP_R0_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(CP_R1_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(CP_R2_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(CP_R3_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(CP_R4_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(CP_TOTAL_READ_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(CP_TOTAL_WRITE_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(CP_TOTAL_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(CP_DATA_BYTES_WRITTEN, UINT64, AVERAGE),
-       COUNTABLE(CP_WRITE_CLEAN_RESPONSES, UINT64, AVERAGE),
-       COUNTABLE(CP_R0_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
-       COUNTABLE(CP_R1_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
-       COUNTABLE(CP_R2_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
-       COUNTABLE(CP_R3_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
-       COUNTABLE(CP_R4_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
-       COUNTABLE(CP_TOTAL_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
-       COUNTABLE(CP_R0_DATA_BEATS_READ, UINT64, AVERAGE),
-       COUNTABLE(CP_R1_DATA_BEATS_READ, UINT64, AVERAGE),
-       COUNTABLE(CP_R2_DATA_BEATS_READ, UINT64, AVERAGE),
-       COUNTABLE(CP_R3_DATA_BEATS_READ, UINT64, AVERAGE),
-       COUNTABLE(CP_R4_DATA_BEATS_READ, UINT64, AVERAGE),
-       COUNTABLE(CP_TOTAL_DATA_BEATS_READ, UINT64, AVERAGE),
-       COUNTABLE(VGT_R0_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(VGT_R1_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(VGT_TOTAL_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(VGT_R0_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
-       COUNTABLE(VGT_R1_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
-       COUNTABLE(VGT_TOTAL_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
-       COUNTABLE(VGT_R0_DATA_BEATS_READ, UINT64, AVERAGE),
-       COUNTABLE(VGT_R1_DATA_BEATS_READ, UINT64, AVERAGE),
-       COUNTABLE(VGT_TOTAL_DATA_BEATS_READ, UINT64, AVERAGE),
-       COUNTABLE(TC_TOTAL_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(TC_ROQ_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(TC_INFO_SENT, UINT64, AVERAGE),
-       COUNTABLE(TC_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
-       COUNTABLE(TC_DATA_BEATS_READ, UINT64, AVERAGE),
-       COUNTABLE(TCD_BURSTS_READ, UINT64, AVERAGE),
-       COUNTABLE(RB_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(RB_DATA_BYTES_WRITTEN, UINT64, AVERAGE),
-       COUNTABLE(RB_WRITE_CLEAN_RESPONSES, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_0, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_1, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_2, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_3, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_4, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_5, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_6, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_7, UINT64, AVERAGE),
-       COUNTABLE(AXI_TOTAL_READ_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_0, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_1, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_2, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_3, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_4, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_5, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_6, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_7, UINT64, AVERAGE),
-       COUNTABLE(AXI_TOTAL_WRITE_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(AXI_TOTAL_REQUESTS_ID_0, UINT64, AVERAGE),
-       COUNTABLE(AXI_TOTAL_REQUESTS_ID_1, UINT64, AVERAGE),
-       COUNTABLE(AXI_TOTAL_REQUESTS_ID_2, UINT64, AVERAGE),
-       COUNTABLE(AXI_TOTAL_REQUESTS_ID_3, UINT64, AVERAGE),
-       COUNTABLE(AXI_TOTAL_REQUESTS_ID_4, UINT64, AVERAGE),
-       COUNTABLE(AXI_TOTAL_REQUESTS_ID_5, UINT64, AVERAGE),
-       COUNTABLE(AXI_TOTAL_REQUESTS_ID_6, UINT64, AVERAGE),
-       COUNTABLE(AXI_TOTAL_REQUESTS_ID_7, UINT64, AVERAGE),
-       COUNTABLE(AXI_TOTAL_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_0, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_1, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_2, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_3, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_4, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_5, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_6, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_7, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_TOTAL_BURSTS, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_0, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_1, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_2, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_3, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_4, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_5, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_6, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_7, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_CHANNEL_TOTAL_DATA_BEATS_READ, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_0, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_1, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_2, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_3, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_4, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_5, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_6, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_7, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_TOTAL_BURSTS, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_0, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_1, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_2, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_3, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_4, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_5, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_6, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_7, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_CHANNEL_TOTAL_DATA_BYTES_WRITTEN, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_0, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_1, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_2, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_3, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_4, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_5, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_6, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_7, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_TOTAL_RESPONSES, UINT64, AVERAGE),
-       COUNTABLE(TOTAL_MMU_MISSES, UINT64, AVERAGE),
-       COUNTABLE(MMU_READ_MISSES, UINT64, AVERAGE),
-       COUNTABLE(MMU_WRITE_MISSES, UINT64, AVERAGE),
-       COUNTABLE(TOTAL_MMU_HITS, UINT64, AVERAGE),
-       COUNTABLE(MMU_READ_HITS, UINT64, AVERAGE),
-       COUNTABLE(MMU_WRITE_HITS, UINT64, AVERAGE),
-       COUNTABLE(SPLIT_MODE_TC_HITS, UINT64, AVERAGE),
-       COUNTABLE(SPLIT_MODE_TC_MISSES, UINT64, AVERAGE),
-       COUNTABLE(SPLIT_MODE_NON_TC_HITS, UINT64, AVERAGE),
-       COUNTABLE(SPLIT_MODE_NON_TC_MISSES, UINT64, AVERAGE),
-       COUNTABLE(STALL_AWAITING_TLB_MISS_FETCH, UINT64, AVERAGE),
-       COUNTABLE(MMU_TLB_MISS_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
-       COUNTABLE(MMU_TLB_MISS_DATA_BEATS_READ, UINT64, AVERAGE),
-       COUNTABLE(CP_CYCLES_HELD_OFF, UINT64, AVERAGE),
-       COUNTABLE(VGT_CYCLES_HELD_OFF, UINT64, AVERAGE),
-       COUNTABLE(TC_CYCLES_HELD_OFF, UINT64, AVERAGE),
-       COUNTABLE(TC_ROQ_CYCLES_HELD_OFF, UINT64, AVERAGE),
-       COUNTABLE(TC_CYCLES_HELD_OFF_TCD_FULL, UINT64, AVERAGE),
-       COUNTABLE(RB_CYCLES_HELD_OFF, UINT64, AVERAGE),
-       COUNTABLE(TOTAL_CYCLES_ANY_CLNT_HELD_OFF, UINT64, AVERAGE),
-       COUNTABLE(TLB_MISS_CYCLES_HELD_OFF, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUEST_HELD_OFF, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUEST_HELD_OFF, UINT64, AVERAGE),
-       COUNTABLE(AXI_REQUEST_HELD_OFF, UINT64, AVERAGE),
-       COUNTABLE(AXI_REQUEST_HELD_OFF_INFLIGHT_LIMIT, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_HELD_OFF, UINT64, AVERAGE),
-       COUNTABLE(CP_SAME_PAGE_BANK_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(VGT_SAME_PAGE_BANK_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(TC_SAME_PAGE_BANK_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(TC_ARB_HOLD_SAME_PAGE_BANK_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(RB_SAME_PAGE_BANK_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(TOTAL_SAME_PAGE_BANK_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(CP_SAME_PAGE_BANK_REQUESTS_KILLED_FAIRNESS_LIMIT, UINT64, AVERAGE),
-       COUNTABLE(VGT_SAME_PAGE_BANK_REQUESTS_KILLED_FAIRNESS_LIMIT, UINT64, AVERAGE),
-       COUNTABLE(TC_SAME_PAGE_BANK_REQUESTS_KILLED_FAIRNESS_LIMIT, UINT64, AVERAGE),
-       COUNTABLE(RB_SAME_PAGE_BANK_REQUESTS_KILLED_FAIRNESS_LIMIT, UINT64, AVERAGE),
-       COUNTABLE(TOTAL_SAME_PAGE_BANK_KILLED_FAIRNESS_LIMIT, UINT64, AVERAGE),
-       COUNTABLE(TOTAL_MH_READ_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(TOTAL_MH_WRITE_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(TOTAL_MH_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(MH_BUSY, UINT64, AVERAGE),
-       COUNTABLE(CP_NTH_ACCESS_SAME_PAGE_BANK_SEQUENCE, UINT64, AVERAGE),
-       COUNTABLE(VGT_NTH_ACCESS_SAME_PAGE_BANK_SEQUENCE, UINT64, AVERAGE),
-       COUNTABLE(TC_NTH_ACCESS_SAME_PAGE_BANK_SEQUENCE, UINT64, AVERAGE),
-       COUNTABLE(RB_NTH_ACCESS_SAME_PAGE_BANK_SEQUENCE, UINT64, AVERAGE),
-       COUNTABLE(TC_ROQ_N_VALID_ENTRIES, UINT64, AVERAGE),
-       COUNTABLE(ARQ_N_ENTRIES, UINT64, AVERAGE),
-       COUNTABLE(WDB_N_ENTRIES, UINT64, AVERAGE),
-       COUNTABLE(MH_READ_LATENCY_OUTST_REQ_SUM, UINT64, AVERAGE),
-       COUNTABLE(MC_READ_LATENCY_OUTST_REQ_SUM, UINT64, AVERAGE),
-       COUNTABLE(MC_TOTAL_READ_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(ELAPSED_CYCLES_MH_GATED_CLK, UINT64, AVERAGE),
-       COUNTABLE(ELAPSED_CLK_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(CP_W_16B_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(CP_W_32B_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(TC_16B_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(TC_32B_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(PA_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(PA_DATA_BYTES_WRITTEN, UINT64, AVERAGE),
-       COUNTABLE(PA_WRITE_CLEAN_RESPONSES, UINT64, AVERAGE),
-       COUNTABLE(PA_CYCLES_HELD_OFF, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_0, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_1, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_2, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_3, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_4, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_5, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_6, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_7, UINT64, AVERAGE),
-       COUNTABLE(AXI_TOTAL_READ_REQUEST_DATA_BEATS, UINT64, AVERAGE),
+      COUNTABLE(CP_R0_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(CP_R1_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(CP_R2_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(CP_R3_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(CP_R4_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(CP_TOTAL_READ_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(CP_TOTAL_WRITE_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(CP_TOTAL_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(CP_DATA_BYTES_WRITTEN, UINT64, AVERAGE),
+      COUNTABLE(CP_WRITE_CLEAN_RESPONSES, UINT64, AVERAGE),
+      COUNTABLE(CP_R0_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
+      COUNTABLE(CP_R1_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
+      COUNTABLE(CP_R2_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
+      COUNTABLE(CP_R3_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
+      COUNTABLE(CP_R4_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
+      COUNTABLE(CP_TOTAL_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
+      COUNTABLE(CP_R0_DATA_BEATS_READ, UINT64, AVERAGE),
+      COUNTABLE(CP_R1_DATA_BEATS_READ, UINT64, AVERAGE),
+      COUNTABLE(CP_R2_DATA_BEATS_READ, UINT64, AVERAGE),
+      COUNTABLE(CP_R3_DATA_BEATS_READ, UINT64, AVERAGE),
+      COUNTABLE(CP_R4_DATA_BEATS_READ, UINT64, AVERAGE),
+      COUNTABLE(CP_TOTAL_DATA_BEATS_READ, UINT64, AVERAGE),
+      COUNTABLE(VGT_R0_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(VGT_R1_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(VGT_TOTAL_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(VGT_R0_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
+      COUNTABLE(VGT_R1_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
+      COUNTABLE(VGT_TOTAL_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
+      COUNTABLE(VGT_R0_DATA_BEATS_READ, UINT64, AVERAGE),
+      COUNTABLE(VGT_R1_DATA_BEATS_READ, UINT64, AVERAGE),
+      COUNTABLE(VGT_TOTAL_DATA_BEATS_READ, UINT64, AVERAGE),
+      COUNTABLE(TC_TOTAL_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(TC_ROQ_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(TC_INFO_SENT, UINT64, AVERAGE),
+      COUNTABLE(TC_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
+      COUNTABLE(TC_DATA_BEATS_READ, UINT64, AVERAGE),
+      COUNTABLE(TCD_BURSTS_READ, UINT64, AVERAGE),
+      COUNTABLE(RB_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(RB_DATA_BYTES_WRITTEN, UINT64, AVERAGE),
+      COUNTABLE(RB_WRITE_CLEAN_RESPONSES, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_0, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_1, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_2, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_3, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_4, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_5, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_6, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_7, UINT64, AVERAGE),
+      COUNTABLE(AXI_TOTAL_READ_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_0, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_1, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_2, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_3, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_4, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_5, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_6, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_7, UINT64, AVERAGE),
+      COUNTABLE(AXI_TOTAL_WRITE_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(AXI_TOTAL_REQUESTS_ID_0, UINT64, AVERAGE),
+      COUNTABLE(AXI_TOTAL_REQUESTS_ID_1, UINT64, AVERAGE),
+      COUNTABLE(AXI_TOTAL_REQUESTS_ID_2, UINT64, AVERAGE),
+      COUNTABLE(AXI_TOTAL_REQUESTS_ID_3, UINT64, AVERAGE),
+      COUNTABLE(AXI_TOTAL_REQUESTS_ID_4, UINT64, AVERAGE),
+      COUNTABLE(AXI_TOTAL_REQUESTS_ID_5, UINT64, AVERAGE),
+      COUNTABLE(AXI_TOTAL_REQUESTS_ID_6, UINT64, AVERAGE),
+      COUNTABLE(AXI_TOTAL_REQUESTS_ID_7, UINT64, AVERAGE),
+      COUNTABLE(AXI_TOTAL_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_0, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_1, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_2, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_3, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_4, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_5, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_6, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_BURSTS_ID_7, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_TOTAL_BURSTS, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_0, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_1, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_2, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_3, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_4, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_5, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_6, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_DATA_BEATS_READ_ID_7, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_CHANNEL_TOTAL_DATA_BEATS_READ, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_0, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_1, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_2, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_3, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_4, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_5, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_6, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_BURSTS_ID_7, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_TOTAL_BURSTS, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_0, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_1, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_2, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_3, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_4, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_5, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_6, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_DATA_BYTES_WRITTEN_ID_7, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_CHANNEL_TOTAL_DATA_BYTES_WRITTEN, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_0, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_1, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_2, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_3, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_4, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_5, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_6, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_RESPONSES_ID_7, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_RESPONSE_CHANNEL_TOTAL_RESPONSES, UINT64, AVERAGE),
+      COUNTABLE(TOTAL_MMU_MISSES, UINT64, AVERAGE),
+      COUNTABLE(MMU_READ_MISSES, UINT64, AVERAGE),
+      COUNTABLE(MMU_WRITE_MISSES, UINT64, AVERAGE),
+      COUNTABLE(TOTAL_MMU_HITS, UINT64, AVERAGE),
+      COUNTABLE(MMU_READ_HITS, UINT64, AVERAGE),
+      COUNTABLE(MMU_WRITE_HITS, UINT64, AVERAGE),
+      COUNTABLE(SPLIT_MODE_TC_HITS, UINT64, AVERAGE),
+      COUNTABLE(SPLIT_MODE_TC_MISSES, UINT64, AVERAGE),
+      COUNTABLE(SPLIT_MODE_NON_TC_HITS, UINT64, AVERAGE),
+      COUNTABLE(SPLIT_MODE_NON_TC_MISSES, UINT64, AVERAGE),
+      COUNTABLE(STALL_AWAITING_TLB_MISS_FETCH, UINT64, AVERAGE),
+      COUNTABLE(MMU_TLB_MISS_READ_BURSTS_RECEIVED, UINT64, AVERAGE),
+      COUNTABLE(MMU_TLB_MISS_DATA_BEATS_READ, UINT64, AVERAGE),
+      COUNTABLE(CP_CYCLES_HELD_OFF, UINT64, AVERAGE),
+      COUNTABLE(VGT_CYCLES_HELD_OFF, UINT64, AVERAGE),
+      COUNTABLE(TC_CYCLES_HELD_OFF, UINT64, AVERAGE),
+      COUNTABLE(TC_ROQ_CYCLES_HELD_OFF, UINT64, AVERAGE),
+      COUNTABLE(TC_CYCLES_HELD_OFF_TCD_FULL, UINT64, AVERAGE),
+      COUNTABLE(RB_CYCLES_HELD_OFF, UINT64, AVERAGE),
+      COUNTABLE(TOTAL_CYCLES_ANY_CLNT_HELD_OFF, UINT64, AVERAGE),
+      COUNTABLE(TLB_MISS_CYCLES_HELD_OFF, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUEST_HELD_OFF, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUEST_HELD_OFF, UINT64, AVERAGE),
+      COUNTABLE(AXI_REQUEST_HELD_OFF, UINT64, AVERAGE),
+      COUNTABLE(AXI_REQUEST_HELD_OFF_INFLIGHT_LIMIT, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_HELD_OFF, UINT64, AVERAGE),
+      COUNTABLE(CP_SAME_PAGE_BANK_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(VGT_SAME_PAGE_BANK_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(TC_SAME_PAGE_BANK_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(TC_ARB_HOLD_SAME_PAGE_BANK_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(RB_SAME_PAGE_BANK_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(TOTAL_SAME_PAGE_BANK_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(CP_SAME_PAGE_BANK_REQUESTS_KILLED_FAIRNESS_LIMIT, UINT64, AVERAGE),
+      COUNTABLE(VGT_SAME_PAGE_BANK_REQUESTS_KILLED_FAIRNESS_LIMIT, UINT64, AVERAGE),
+      COUNTABLE(TC_SAME_PAGE_BANK_REQUESTS_KILLED_FAIRNESS_LIMIT, UINT64, AVERAGE),
+      COUNTABLE(RB_SAME_PAGE_BANK_REQUESTS_KILLED_FAIRNESS_LIMIT, UINT64, AVERAGE),
+      COUNTABLE(TOTAL_SAME_PAGE_BANK_KILLED_FAIRNESS_LIMIT, UINT64, AVERAGE),
+      COUNTABLE(TOTAL_MH_READ_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(TOTAL_MH_WRITE_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(TOTAL_MH_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(MH_BUSY, UINT64, AVERAGE),
+      COUNTABLE(CP_NTH_ACCESS_SAME_PAGE_BANK_SEQUENCE, UINT64, AVERAGE),
+      COUNTABLE(VGT_NTH_ACCESS_SAME_PAGE_BANK_SEQUENCE, UINT64, AVERAGE),
+      COUNTABLE(TC_NTH_ACCESS_SAME_PAGE_BANK_SEQUENCE, UINT64, AVERAGE),
+      COUNTABLE(RB_NTH_ACCESS_SAME_PAGE_BANK_SEQUENCE, UINT64, AVERAGE),
+      COUNTABLE(TC_ROQ_N_VALID_ENTRIES, UINT64, AVERAGE),
+      COUNTABLE(ARQ_N_ENTRIES, UINT64, AVERAGE),
+      COUNTABLE(WDB_N_ENTRIES, UINT64, AVERAGE),
+      COUNTABLE(MH_READ_LATENCY_OUTST_REQ_SUM, UINT64, AVERAGE),
+      COUNTABLE(MC_READ_LATENCY_OUTST_REQ_SUM, UINT64, AVERAGE),
+      COUNTABLE(MC_TOTAL_READ_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(ELAPSED_CYCLES_MH_GATED_CLK, UINT64, AVERAGE),
+      COUNTABLE(ELAPSED_CLK_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(CP_W_16B_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(CP_W_32B_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(TC_16B_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(TC_32B_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(PA_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(PA_DATA_BYTES_WRITTEN, UINT64, AVERAGE),
+      COUNTABLE(PA_WRITE_CLEAN_RESPONSES, UINT64, AVERAGE),
+      COUNTABLE(PA_CYCLES_HELD_OFF, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_0, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_1, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_2, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_3, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_4, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_5, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_6, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUEST_DATA_BEATS_ID_7, UINT64, AVERAGE),
+      COUNTABLE(AXI_TOTAL_READ_REQUEST_DATA_BEATS, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_countable rb_countables[] = {
-       COUNTABLE(RBPERF_CNTX_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_CNTX_BUSY_MAX, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_SX_QUAD_STARVED, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_SX_QUAD_STARVED_MAX, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_GA_GC_CH0_SYS_REQ, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_GA_GC_CH0_SYS_REQ_MAX, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_GA_GC_CH1_SYS_REQ, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_GA_GC_CH1_SYS_REQ_MAX, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_MH_STARVED, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_MH_STARVED_MAX, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_AZ_BC_COLOR_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_AZ_BC_COLOR_BUSY_MAX, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_AZ_BC_Z_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_AZ_BC_Z_BUSY_MAX, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_SC_TILE_RTR_N, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_SC_TILE_RTR_N_MAX, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_SC_SAMP_RTR_N, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_SC_SAMP_RTR_N_MAX, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_SX_QUAD_RTR_N, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_SX_QUAD_RTR_N_MAX, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_SX_COLOR_RTR_N, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_SX_COLOR_RTR_N_MAX, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_SC_SAMP_LZ_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_SC_SAMP_LZ_BUSY_MAX, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_ZXP_STALL, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_ZXP_STALL_MAX, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_EVENT_PENDING, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_EVENT_PENDING_MAX, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_MH_VALID, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_MH_VALID_MAX, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_SX_RB_QUAD_SEND, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_SX_RB_COLOR_SEND, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_SC_RB_TILE_SEND, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_SC_RB_SAMPLE_SEND, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_SX_RB_MEM_EXPORT, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_SX_RB_QUAD_EVENT, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_SC_RB_TILE_EVENT_FILTERED, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_SC_RB_TILE_EVENT_ALL, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_SC_EZ_SEND, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_SX_INDEX_SEND, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_GMEM_INTFO_RD, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_GMEM_INTF1_RD, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_GMEM_INTFO_WR, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_GMEM_INTF1_WR, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_CP_CONTEXT_DONE, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_RB_CP_CACHE_FLUSH, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_ZPASS_DONE, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_ZCMD_VALID, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_CCMD_VALID, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_ACCUM_GRANT, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_ACCUM_C0_GRANT, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_ACCUM_C1_GRANT, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_ACCUM_FULL_BE_WR, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_ACCUM_REQUEST_NO_GRANT, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_ACCUM_TIMEOUT_PULSE, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_ACCUM_LIN_TIMEOUT_PULSE, UINT64, AVERAGE),
-       COUNTABLE(RBPERF_ACCUM_CAM_HIT_FLUSHING, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_CNTX_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_CNTX_BUSY_MAX, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_SX_QUAD_STARVED, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_SX_QUAD_STARVED_MAX, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_GA_GC_CH0_SYS_REQ, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_GA_GC_CH0_SYS_REQ_MAX, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_GA_GC_CH1_SYS_REQ, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_GA_GC_CH1_SYS_REQ_MAX, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_MH_STARVED, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_MH_STARVED_MAX, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_AZ_BC_COLOR_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_AZ_BC_COLOR_BUSY_MAX, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_AZ_BC_Z_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_AZ_BC_Z_BUSY_MAX, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_SC_TILE_RTR_N, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_SC_TILE_RTR_N_MAX, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_SC_SAMP_RTR_N, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_SC_SAMP_RTR_N_MAX, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_SX_QUAD_RTR_N, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_SX_QUAD_RTR_N_MAX, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_SX_COLOR_RTR_N, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_SX_COLOR_RTR_N_MAX, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_SC_SAMP_LZ_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_SC_SAMP_LZ_BUSY_MAX, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_ZXP_STALL, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_ZXP_STALL_MAX, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_EVENT_PENDING, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_EVENT_PENDING_MAX, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_MH_VALID, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_MH_VALID_MAX, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_SX_RB_QUAD_SEND, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_SX_RB_COLOR_SEND, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_SC_RB_TILE_SEND, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_SC_RB_SAMPLE_SEND, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_SX_RB_MEM_EXPORT, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_SX_RB_QUAD_EVENT, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_SC_RB_TILE_EVENT_FILTERED, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_SC_RB_TILE_EVENT_ALL, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_SC_EZ_SEND, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_SX_INDEX_SEND, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_GMEM_INTFO_RD, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_GMEM_INTF1_RD, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_GMEM_INTFO_WR, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_GMEM_INTF1_WR, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_CP_CONTEXT_DONE, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_RB_CP_CACHE_FLUSH, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_ZPASS_DONE, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_ZCMD_VALID, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_CCMD_VALID, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_ACCUM_GRANT, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_ACCUM_C0_GRANT, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_ACCUM_C1_GRANT, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_ACCUM_FULL_BE_WR, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_ACCUM_REQUEST_NO_GRANT, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_ACCUM_TIMEOUT_PULSE, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_ACCUM_LIN_TIMEOUT_PULSE, UINT64, AVERAGE),
+      COUNTABLE(RBPERF_ACCUM_CAM_HIT_FLUSHING, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter pa_su_counters[] = {
-       COUNTER(PA_SU_PERFCOUNTER0_SELECT, PA_SU_PERFCOUNTER0_LOW, PA_SU_PERFCOUNTER0_HI),
-       COUNTER(PA_SU_PERFCOUNTER1_SELECT, PA_SU_PERFCOUNTER1_LOW, PA_SU_PERFCOUNTER1_HI),
-       COUNTER(PA_SU_PERFCOUNTER2_SELECT, PA_SU_PERFCOUNTER2_LOW, PA_SU_PERFCOUNTER2_HI),
-       COUNTER(PA_SU_PERFCOUNTER3_SELECT, PA_SU_PERFCOUNTER3_LOW, PA_SU_PERFCOUNTER3_HI),
+      COUNTER(PA_SU_PERFCOUNTER0_SELECT, PA_SU_PERFCOUNTER0_LOW, PA_SU_PERFCOUNTER0_HI),
+      COUNTER(PA_SU_PERFCOUNTER1_SELECT, PA_SU_PERFCOUNTER1_LOW, PA_SU_PERFCOUNTER1_HI),
+      COUNTER(PA_SU_PERFCOUNTER2_SELECT, PA_SU_PERFCOUNTER2_LOW, PA_SU_PERFCOUNTER2_HI),
+      COUNTER(PA_SU_PERFCOUNTER3_SELECT, PA_SU_PERFCOUNTER3_LOW, PA_SU_PERFCOUNTER3_HI),
 };
 
 static const struct fd_perfcntr_counter pa_sc_counters[] = {
-       COUNTER(PA_SC_PERFCOUNTER0_SELECT, PA_SC_PERFCOUNTER0_LOW, PA_SC_PERFCOUNTER0_HI),
+      COUNTER(PA_SC_PERFCOUNTER0_SELECT, PA_SC_PERFCOUNTER0_LOW, PA_SC_PERFCOUNTER0_HI),
 };
 
 static const struct fd_perfcntr_counter vgt_counters[] = {
-       COUNTER(VGT_PERFCOUNTER0_SELECT, VGT_PERFCOUNTER0_LOW, VGT_PERFCOUNTER0_HI),
-       COUNTER(VGT_PERFCOUNTER1_SELECT, VGT_PERFCOUNTER1_LOW, VGT_PERFCOUNTER1_HI),
-       COUNTER(VGT_PERFCOUNTER2_SELECT, VGT_PERFCOUNTER2_LOW, VGT_PERFCOUNTER2_HI),
-       COUNTER(VGT_PERFCOUNTER3_SELECT, VGT_PERFCOUNTER3_LOW, VGT_PERFCOUNTER3_HI),
+      COUNTER(VGT_PERFCOUNTER0_SELECT, VGT_PERFCOUNTER0_LOW, VGT_PERFCOUNTER0_HI),
+      COUNTER(VGT_PERFCOUNTER1_SELECT, VGT_PERFCOUNTER1_LOW, VGT_PERFCOUNTER1_HI),
+      COUNTER(VGT_PERFCOUNTER2_SELECT, VGT_PERFCOUNTER2_LOW, VGT_PERFCOUNTER2_HI),
+      COUNTER(VGT_PERFCOUNTER3_SELECT, VGT_PERFCOUNTER3_LOW, VGT_PERFCOUNTER3_HI),
 };
 
 static const struct fd_perfcntr_counter tcr_counters[] = {
-       COUNTER(TCR_PERFCOUNTER0_SELECT, TCR_PERFCOUNTER0_LOW, TCR_PERFCOUNTER0_HI),
-       COUNTER(TCR_PERFCOUNTER1_SELECT, TCR_PERFCOUNTER1_LOW, TCR_PERFCOUNTER1_HI),
+      COUNTER(TCR_PERFCOUNTER0_SELECT, TCR_PERFCOUNTER0_LOW, TCR_PERFCOUNTER0_HI),
+      COUNTER(TCR_PERFCOUNTER1_SELECT, TCR_PERFCOUNTER1_LOW, TCR_PERFCOUNTER1_HI),
 };
 
 static const struct fd_perfcntr_counter tp0_counters[] = {
-       COUNTER(TP0_PERFCOUNTER0_SELECT, TP0_PERFCOUNTER0_LOW, TP0_PERFCOUNTER0_HI),
-       COUNTER(TP0_PERFCOUNTER1_SELECT, TP0_PERFCOUNTER1_LOW, TP0_PERFCOUNTER1_HI),
+      COUNTER(TP0_PERFCOUNTER0_SELECT, TP0_PERFCOUNTER0_LOW, TP0_PERFCOUNTER0_HI),
+      COUNTER(TP0_PERFCOUNTER1_SELECT, TP0_PERFCOUNTER1_LOW, TP0_PERFCOUNTER1_HI),
 };
 
 static const struct fd_perfcntr_counter tcm_counters[] = {
-       COUNTER(TCM_PERFCOUNTER0_SELECT, TCM_PERFCOUNTER0_LOW, TCM_PERFCOUNTER0_HI),
-       COUNTER(TCM_PERFCOUNTER1_SELECT, TCM_PERFCOUNTER1_LOW, TCM_PERFCOUNTER1_HI),
+      COUNTER(TCM_PERFCOUNTER0_SELECT, TCM_PERFCOUNTER0_LOW, TCM_PERFCOUNTER0_HI),
+      COUNTER(TCM_PERFCOUNTER1_SELECT, TCM_PERFCOUNTER1_LOW, TCM_PERFCOUNTER1_HI),
 };
 
 static const struct fd_perfcntr_counter tcf_counters[] = {
-       COUNTER(TCF_PERFCOUNTER0_SELECT, TCF_PERFCOUNTER0_LOW, TCF_PERFCOUNTER0_HI),
-       COUNTER(TCF_PERFCOUNTER1_SELECT, TCF_PERFCOUNTER1_LOW, TCF_PERFCOUNTER1_HI),
-       COUNTER(TCF_PERFCOUNTER2_SELECT, TCF_PERFCOUNTER2_LOW, TCF_PERFCOUNTER2_HI),
-       COUNTER(TCF_PERFCOUNTER3_SELECT, TCF_PERFCOUNTER3_LOW, TCF_PERFCOUNTER3_HI),
-       COUNTER(TCF_PERFCOUNTER4_SELECT, TCF_PERFCOUNTER4_LOW, TCF_PERFCOUNTER4_HI),
-       COUNTER(TCF_PERFCOUNTER5_SELECT, TCF_PERFCOUNTER5_LOW, TCF_PERFCOUNTER5_HI),
-       COUNTER(TCF_PERFCOUNTER6_SELECT, TCF_PERFCOUNTER6_LOW, TCF_PERFCOUNTER6_HI),
-       COUNTER(TCF_PERFCOUNTER7_SELECT, TCF_PERFCOUNTER7_LOW, TCF_PERFCOUNTER7_HI),
-       COUNTER(TCF_PERFCOUNTER8_SELECT, TCF_PERFCOUNTER8_LOW, TCF_PERFCOUNTER8_HI),
-       COUNTER(TCF_PERFCOUNTER9_SELECT, TCF_PERFCOUNTER9_LOW, TCF_PERFCOUNTER9_HI),
-       COUNTER(TCF_PERFCOUNTER10_SELECT, TCF_PERFCOUNTER10_LOW, TCF_PERFCOUNTER10_HI),
-       COUNTER(TCF_PERFCOUNTER11_SELECT, TCF_PERFCOUNTER11_LOW, TCF_PERFCOUNTER11_HI),
+      COUNTER(TCF_PERFCOUNTER0_SELECT, TCF_PERFCOUNTER0_LOW, TCF_PERFCOUNTER0_HI),
+      COUNTER(TCF_PERFCOUNTER1_SELECT, TCF_PERFCOUNTER1_LOW, TCF_PERFCOUNTER1_HI),
+      COUNTER(TCF_PERFCOUNTER2_SELECT, TCF_PERFCOUNTER2_LOW, TCF_PERFCOUNTER2_HI),
+      COUNTER(TCF_PERFCOUNTER3_SELECT, TCF_PERFCOUNTER3_LOW, TCF_PERFCOUNTER3_HI),
+      COUNTER(TCF_PERFCOUNTER4_SELECT, TCF_PERFCOUNTER4_LOW, TCF_PERFCOUNTER4_HI),
+      COUNTER(TCF_PERFCOUNTER5_SELECT, TCF_PERFCOUNTER5_LOW, TCF_PERFCOUNTER5_HI),
+      COUNTER(TCF_PERFCOUNTER6_SELECT, TCF_PERFCOUNTER6_LOW, TCF_PERFCOUNTER6_HI),
+      COUNTER(TCF_PERFCOUNTER7_SELECT, TCF_PERFCOUNTER7_LOW, TCF_PERFCOUNTER7_HI),
+      COUNTER(TCF_PERFCOUNTER8_SELECT, TCF_PERFCOUNTER8_LOW, TCF_PERFCOUNTER8_HI),
+      COUNTER(TCF_PERFCOUNTER9_SELECT, TCF_PERFCOUNTER9_LOW, TCF_PERFCOUNTER9_HI),
+      COUNTER(TCF_PERFCOUNTER10_SELECT, TCF_PERFCOUNTER10_LOW, TCF_PERFCOUNTER10_HI),
+      COUNTER(TCF_PERFCOUNTER11_SELECT, TCF_PERFCOUNTER11_LOW, TCF_PERFCOUNTER11_HI),
 };
 
 static const struct fd_perfcntr_counter sq_counters[] = {
-       COUNTER(SQ_PERFCOUNTER0_SELECT, SQ_PERFCOUNTER0_LOW, SQ_PERFCOUNTER0_HI),
-       COUNTER(SQ_PERFCOUNTER1_SELECT, SQ_PERFCOUNTER1_LOW, SQ_PERFCOUNTER1_HI),
-       COUNTER(SQ_PERFCOUNTER2_SELECT, SQ_PERFCOUNTER2_LOW, SQ_PERFCOUNTER2_HI),
-       COUNTER(SQ_PERFCOUNTER3_SELECT, SQ_PERFCOUNTER3_LOW, SQ_PERFCOUNTER3_HI),
+      COUNTER(SQ_PERFCOUNTER0_SELECT, SQ_PERFCOUNTER0_LOW, SQ_PERFCOUNTER0_HI),
+      COUNTER(SQ_PERFCOUNTER1_SELECT, SQ_PERFCOUNTER1_LOW, SQ_PERFCOUNTER1_HI),
+      COUNTER(SQ_PERFCOUNTER2_SELECT, SQ_PERFCOUNTER2_LOW, SQ_PERFCOUNTER2_HI),
+      COUNTER(SQ_PERFCOUNTER3_SELECT, SQ_PERFCOUNTER3_LOW, SQ_PERFCOUNTER3_HI),
 };
 
 static const struct fd_perfcntr_countable rbbm_countables[] = {
-       COUNTABLE(RBBM1_COUNT, UINT64, AVERAGE),
-       COUNTABLE(RBBM1_NRT_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBBM1_RB_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBBM1_SQ_CNTX0_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBBM1_SQ_CNTX17_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBBM1_VGT_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBBM1_VGT_NODMA_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBBM1_PA_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBBM1_SC_CNTX_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBBM1_TPC_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBBM1_TC_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBBM1_SX_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBBM1_CP_COHER_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBBM1_CP_NRT_BUSY, UINT64, AVERAGE),
-       COUNTABLE(RBBM1_GFX_IDLE_STALL, UINT64, AVERAGE),
-       COUNTABLE(RBBM1_INTERRUPT, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_COUNT, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_NRT_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_RB_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_SQ_CNTX0_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_SQ_CNTX17_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_VGT_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_VGT_NODMA_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_PA_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_SC_CNTX_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_TPC_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_TC_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_SX_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_CP_COHER_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_CP_NRT_BUSY, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_GFX_IDLE_STALL, UINT64, AVERAGE),
+      COUNTABLE(RBBM1_INTERRUPT, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_countable cp_countables[] = {
-       COUNTABLE(ALWAYS_COUNT, UINT64, AVERAGE),
-       COUNTABLE(TRANS_FIFO_FULL, UINT64, AVERAGE),
-       COUNTABLE(TRANS_FIFO_AF, UINT64, AVERAGE),
-       COUNTABLE(RCIU_PFPTRANS_WAIT, UINT64, AVERAGE),
-       COUNTABLE(RCIU_NRTTRANS_WAIT, UINT64, AVERAGE),
-       COUNTABLE(CSF_NRT_READ_WAIT, UINT64, AVERAGE),
-       COUNTABLE(CSF_I1_FIFO_FULL, UINT64, AVERAGE),
-       COUNTABLE(CSF_I2_FIFO_FULL, UINT64, AVERAGE),
-       COUNTABLE(CSF_ST_FIFO_FULL, UINT64, AVERAGE),
-       COUNTABLE(CSF_RING_ROQ_FULL, UINT64, AVERAGE),
-       COUNTABLE(CSF_I1_ROQ_FULL, UINT64, AVERAGE),
-       COUNTABLE(CSF_I2_ROQ_FULL, UINT64, AVERAGE),
-       COUNTABLE(CSF_ST_ROQ_FULL, UINT64, AVERAGE),
-       COUNTABLE(MIU_TAG_MEM_FULL, UINT64, AVERAGE),
-       COUNTABLE(MIU_WRITECLEAN, UINT64, AVERAGE),
-       COUNTABLE(MIU_NRT_WRITE_STALLED, UINT64, AVERAGE),
-       COUNTABLE(MIU_NRT_READ_STALLED, UINT64, AVERAGE),
-       COUNTABLE(ME_WRITE_CONFIRM_FIFO_FULL, UINT64, AVERAGE),
-       COUNTABLE(ME_VS_DEALLOC_FIFO_FULL, UINT64, AVERAGE),
-       COUNTABLE(ME_PS_DEALLOC_FIFO_FULL, UINT64, AVERAGE),
-       COUNTABLE(ME_REGS_VS_EVENT_FIFO_FULL, UINT64, AVERAGE),
-       COUNTABLE(ME_REGS_PS_EVENT_FIFO_FULL, UINT64, AVERAGE),
-       COUNTABLE(ME_REGS_CF_EVENT_FIFO_FULL, UINT64, AVERAGE),
-       COUNTABLE(ME_MICRO_RB_STARVED, UINT64, AVERAGE),
-       COUNTABLE(ME_MICRO_I1_STARVED, UINT64, AVERAGE),
-       COUNTABLE(ME_MICRO_I2_STARVED, UINT64, AVERAGE),
-       COUNTABLE(ME_MICRO_ST_STARVED, UINT64, AVERAGE),
-       COUNTABLE(RCIU_RBBM_DWORD_SENT, UINT64, AVERAGE),
-       COUNTABLE(ME_BUSY_CLOCKS, UINT64, AVERAGE),
-       COUNTABLE(ME_WAIT_CONTEXT_AVAIL, UINT64, AVERAGE),
-       COUNTABLE(PFP_TYPE0_PACKET, UINT64, AVERAGE),
-       COUNTABLE(PFP_TYPE3_PACKET, UINT64, AVERAGE),
-       COUNTABLE(CSF_RB_WPTR_NEQ_RPTR, UINT64, AVERAGE),
-       COUNTABLE(CSF_I1_SIZE_NEQ_ZERO, UINT64, AVERAGE),
-       COUNTABLE(CSF_I2_SIZE_NEQ_ZERO, UINT64, AVERAGE),
-       COUNTABLE(CSF_RBI1I2_FETCHING, UINT64, AVERAGE),
+      COUNTABLE(ALWAYS_COUNT, UINT64, AVERAGE),
+      COUNTABLE(TRANS_FIFO_FULL, UINT64, AVERAGE),
+      COUNTABLE(TRANS_FIFO_AF, UINT64, AVERAGE),
+      COUNTABLE(RCIU_PFPTRANS_WAIT, UINT64, AVERAGE),
+      COUNTABLE(RCIU_NRTTRANS_WAIT, UINT64, AVERAGE),
+      COUNTABLE(CSF_NRT_READ_WAIT, UINT64, AVERAGE),
+      COUNTABLE(CSF_I1_FIFO_FULL, UINT64, AVERAGE),
+      COUNTABLE(CSF_I2_FIFO_FULL, UINT64, AVERAGE),
+      COUNTABLE(CSF_ST_FIFO_FULL, UINT64, AVERAGE),
+      COUNTABLE(CSF_RING_ROQ_FULL, UINT64, AVERAGE),
+      COUNTABLE(CSF_I1_ROQ_FULL, UINT64, AVERAGE),
+      COUNTABLE(CSF_I2_ROQ_FULL, UINT64, AVERAGE),
+      COUNTABLE(CSF_ST_ROQ_FULL, UINT64, AVERAGE),
+      COUNTABLE(MIU_TAG_MEM_FULL, UINT64, AVERAGE),
+      COUNTABLE(MIU_WRITECLEAN, UINT64, AVERAGE),
+      COUNTABLE(MIU_NRT_WRITE_STALLED, UINT64, AVERAGE),
+      COUNTABLE(MIU_NRT_READ_STALLED, UINT64, AVERAGE),
+      COUNTABLE(ME_WRITE_CONFIRM_FIFO_FULL, UINT64, AVERAGE),
+      COUNTABLE(ME_VS_DEALLOC_FIFO_FULL, UINT64, AVERAGE),
+      COUNTABLE(ME_PS_DEALLOC_FIFO_FULL, UINT64, AVERAGE),
+      COUNTABLE(ME_REGS_VS_EVENT_FIFO_FULL, UINT64, AVERAGE),
+      COUNTABLE(ME_REGS_PS_EVENT_FIFO_FULL, UINT64, AVERAGE),
+      COUNTABLE(ME_REGS_CF_EVENT_FIFO_FULL, UINT64, AVERAGE),
+      COUNTABLE(ME_MICRO_RB_STARVED, UINT64, AVERAGE),
+      COUNTABLE(ME_MICRO_I1_STARVED, UINT64, AVERAGE),
+      COUNTABLE(ME_MICRO_I2_STARVED, UINT64, AVERAGE),
+      COUNTABLE(ME_MICRO_ST_STARVED, UINT64, AVERAGE),
+      COUNTABLE(RCIU_RBBM_DWORD_SENT, UINT64, AVERAGE),
+      COUNTABLE(ME_BUSY_CLOCKS, UINT64, AVERAGE),
+      COUNTABLE(ME_WAIT_CONTEXT_AVAIL, UINT64, AVERAGE),
+      COUNTABLE(PFP_TYPE0_PACKET, UINT64, AVERAGE),
+      COUNTABLE(PFP_TYPE3_PACKET, UINT64, AVERAGE),
+      COUNTABLE(CSF_RB_WPTR_NEQ_RPTR, UINT64, AVERAGE),
+      COUNTABLE(CSF_I1_SIZE_NEQ_ZERO, UINT64, AVERAGE),
+      COUNTABLE(CSF_I2_SIZE_NEQ_ZERO, UINT64, AVERAGE),
+      COUNTABLE(CSF_RBI1I2_FETCHING, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter sx_counters[] = {
-       COUNTER(SX_PERFCOUNTER0_SELECT, SX_PERFCOUNTER0_LOW, SX_PERFCOUNTER0_HI),
+      COUNTER(SX_PERFCOUNTER0_SELECT, SX_PERFCOUNTER0_LOW, SX_PERFCOUNTER0_HI),
 };
 
 static const struct fd_perfcntr_counter mh_counters[] = {
-       COUNTER(MH_PERFCOUNTER0_SELECT, MH_PERFCOUNTER0_LOW, MH_PERFCOUNTER0_HI),
-       COUNTER(MH_PERFCOUNTER1_SELECT, MH_PERFCOUNTER1_LOW, MH_PERFCOUNTER1_HI),
+      COUNTER(MH_PERFCOUNTER0_SELECT, MH_PERFCOUNTER0_LOW, MH_PERFCOUNTER0_HI),
+      COUNTER(MH_PERFCOUNTER1_SELECT, MH_PERFCOUNTER1_LOW, MH_PERFCOUNTER1_HI),
 };
 
 static const struct fd_perfcntr_counter rbbm_counters[] = {
-       COUNTER(RBBM_PERFCOUNTER0_SELECT, RBBM_PERFCOUNTER0_LO, RBBM_PERFCOUNTER0_HI),
-       COUNTER(RBBM_PERFCOUNTER1_SELECT, RBBM_PERFCOUNTER1_LO, RBBM_PERFCOUNTER1_HI),
+      COUNTER(RBBM_PERFCOUNTER0_SELECT, RBBM_PERFCOUNTER0_LO, RBBM_PERFCOUNTER0_HI),
+      COUNTER(RBBM_PERFCOUNTER1_SELECT, RBBM_PERFCOUNTER1_LO, RBBM_PERFCOUNTER1_HI),
 };
 
 static const struct fd_perfcntr_counter cp_counters[] = {
-       COUNTER(CP_PERFCOUNTER_SELECT, CP_PERFCOUNTER_LO, CP_PERFCOUNTER_HI),
+      COUNTER(CP_PERFCOUNTER_SELECT, CP_PERFCOUNTER_LO, CP_PERFCOUNTER_HI),
 };
 
 static const struct fd_perfcntr_counter rb_counters[] = {
-       COUNTER(RB_PERFCOUNTER0_SELECT, RB_PERFCOUNTER0_LOW, RB_PERFCOUNTER0_HI),
-       COUNTER(RB_PERFCOUNTER1_SELECT, RB_PERFCOUNTER1_LOW, RB_PERFCOUNTER1_HI),
-       COUNTER(RB_PERFCOUNTER2_SELECT, RB_PERFCOUNTER2_LOW, RB_PERFCOUNTER2_HI),
-       COUNTER(RB_PERFCOUNTER3_SELECT, RB_PERFCOUNTER3_LOW, RB_PERFCOUNTER3_HI),
+      COUNTER(RB_PERFCOUNTER0_SELECT, RB_PERFCOUNTER0_LOW, RB_PERFCOUNTER0_HI),
+      COUNTER(RB_PERFCOUNTER1_SELECT, RB_PERFCOUNTER1_LOW, RB_PERFCOUNTER1_HI),
+      COUNTER(RB_PERFCOUNTER2_SELECT, RB_PERFCOUNTER2_LOW, RB_PERFCOUNTER2_HI),
+      COUNTER(RB_PERFCOUNTER3_SELECT, RB_PERFCOUNTER3_LOW, RB_PERFCOUNTER3_HI),
 };
 
 const struct fd_perfcntr_group a2xx_perfcntr_groups[] = {
-       GROUP("CP", cp_counters, cp_countables),
-       GROUP("PA_SU", pa_su_counters, pa_su_countables),
-       GROUP("PA_SC", pa_sc_counters, pa_sc_countables),
-       GROUP("VGT", vgt_counters, vgt_countables),
-       GROUP("TCR", tcr_counters, tcr_countables),
-       GROUP("TP0", tp0_counters, tp0_countables),
-       GROUP("TCM", tcm_counters, tcm_countables),
-       GROUP("TCF", tcf_counters, tcf_countables),
-       GROUP("SQ", sq_counters, sq_countables),
-       GROUP("SX", sx_counters, sx_countables),
-       GROUP("MH", mh_counters, mh_countables),
-       GROUP("RBBM", rbbm_counters, rbbm_countables),
-       GROUP("RB", rb_counters, rb_countables),
+      GROUP("CP", cp_counters, cp_countables),
+      GROUP("PA_SU", pa_su_counters, pa_su_countables),
+      GROUP("PA_SC", pa_sc_counters, pa_sc_countables),
+      GROUP("VGT", vgt_counters, vgt_countables),
+      GROUP("TCR", tcr_counters, tcr_countables),
+      GROUP("TP0", tp0_counters, tp0_countables),
+      GROUP("TCM", tcm_counters, tcm_countables),
+      GROUP("TCF", tcf_counters, tcf_countables),
+      GROUP("SQ", sq_counters, sq_countables),
+      GROUP("SX", sx_counters, sx_countables),
+      GROUP("MH", mh_counters, mh_countables),
+      GROUP("RBBM", rbbm_counters, rbbm_countables),
+      GROUP("RB", rb_counters, rb_countables),
 };
 
 const unsigned a2xx_num_perfcntr_groups = ARRAY_SIZE(a2xx_perfcntr_groups);
index 0d8d2ae..3204028 100644 (file)
 
 static const struct fd_perfcntr_counter cp_counters[] = {
 //RESERVED: for kernel
-//     COUNTER(CP_PERFCTR_CP_SEL_0, RBBM_PERFCTR_CP_0_LO, RBBM_PERFCTR_CP_0_HI),
-       COUNTER(CP_PERFCTR_CP_SEL_1, RBBM_PERFCTR_CP_1_LO, RBBM_PERFCTR_CP_1_HI),
-       COUNTER(CP_PERFCTR_CP_SEL_2, RBBM_PERFCTR_CP_2_LO, RBBM_PERFCTR_CP_2_HI),
-       COUNTER(CP_PERFCTR_CP_SEL_3, RBBM_PERFCTR_CP_3_LO, RBBM_PERFCTR_CP_3_HI),
-       COUNTER(CP_PERFCTR_CP_SEL_4, RBBM_PERFCTR_CP_4_LO, RBBM_PERFCTR_CP_4_HI),
-       COUNTER(CP_PERFCTR_CP_SEL_5, RBBM_PERFCTR_CP_5_LO, RBBM_PERFCTR_CP_5_HI),
-       COUNTER(CP_PERFCTR_CP_SEL_6, RBBM_PERFCTR_CP_6_LO, RBBM_PERFCTR_CP_6_HI),
-       COUNTER(CP_PERFCTR_CP_SEL_7, RBBM_PERFCTR_CP_7_LO, RBBM_PERFCTR_CP_7_HI),
+//    COUNTER(CP_PERFCTR_CP_SEL_0, RBBM_PERFCTR_CP_0_LO, RBBM_PERFCTR_CP_0_HI),
+      COUNTER(CP_PERFCTR_CP_SEL_1, RBBM_PERFCTR_CP_1_LO, RBBM_PERFCTR_CP_1_HI),
+      COUNTER(CP_PERFCTR_CP_SEL_2, RBBM_PERFCTR_CP_2_LO, RBBM_PERFCTR_CP_2_HI),
+      COUNTER(CP_PERFCTR_CP_SEL_3, RBBM_PERFCTR_CP_3_LO, RBBM_PERFCTR_CP_3_HI),
+      COUNTER(CP_PERFCTR_CP_SEL_4, RBBM_PERFCTR_CP_4_LO, RBBM_PERFCTR_CP_4_HI),
+      COUNTER(CP_PERFCTR_CP_SEL_5, RBBM_PERFCTR_CP_5_LO, RBBM_PERFCTR_CP_5_HI),
+      COUNTER(CP_PERFCTR_CP_SEL_6, RBBM_PERFCTR_CP_6_LO, RBBM_PERFCTR_CP_6_HI),
+      COUNTER(CP_PERFCTR_CP_SEL_7, RBBM_PERFCTR_CP_7_LO, RBBM_PERFCTR_CP_7_HI),
 };
 
 static const struct fd_perfcntr_countable cp_countables[] = {
-       COUNTABLE(PERF_CP_ALWAYS_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_BUSY_GFX_CORE_IDLE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PFP_IDLE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PFP_BUSY_WORKING, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PFP_STALL_CYCLES_ANY, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PFP_STARVE_CYCLES_ANY, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PFP_ICACHE_MISS, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PFP_ICACHE_HIT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PFP_MATCH_PM4_PKT_PROFILE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_ME_BUSY_WORKING, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_ME_IDLE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_ME_STARVE_CYCLES_ANY, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_ME_FIFO_EMPTY_PFP_IDLE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_ME_FIFO_EMPTY_PFP_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_ME_FIFO_FULL_ME_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_ME_FIFO_FULL_ME_NON_WORKING, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_ME_STALL_CYCLES_ANY, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_ME_ICACHE_MISS, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_ME_ICACHE_HIT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_NUM_PREEMPTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PREEMPTION_REACTION_DELAY, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PREEMPTION_SWITCH_OUT_TIME, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PREEMPTION_SWITCH_IN_TIME, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_DEAD_DRAWS_IN_BIN_RENDER, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PREDICATED_DRAWS_KILLED, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_MODE_SWITCH, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_ZPASS_DONE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_CONTEXT_DONE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_CACHE_FLUSH, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_LONG_PREEMPTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_ALWAYS_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_BUSY_GFX_CORE_IDLE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PFP_IDLE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PFP_BUSY_WORKING, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PFP_STALL_CYCLES_ANY, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PFP_STARVE_CYCLES_ANY, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PFP_ICACHE_MISS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PFP_ICACHE_HIT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PFP_MATCH_PM4_PKT_PROFILE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_ME_BUSY_WORKING, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_ME_IDLE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_ME_STARVE_CYCLES_ANY, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_ME_FIFO_EMPTY_PFP_IDLE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_ME_FIFO_EMPTY_PFP_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_ME_FIFO_FULL_ME_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_ME_FIFO_FULL_ME_NON_WORKING, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_ME_STALL_CYCLES_ANY, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_ME_ICACHE_MISS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_ME_ICACHE_HIT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_NUM_PREEMPTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PREEMPTION_REACTION_DELAY, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PREEMPTION_SWITCH_OUT_TIME, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PREEMPTION_SWITCH_IN_TIME, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_DEAD_DRAWS_IN_BIN_RENDER, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PREDICATED_DRAWS_KILLED, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_MODE_SWITCH, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_ZPASS_DONE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_CONTEXT_DONE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_CACHE_FLUSH, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_LONG_PREEMPTIONS, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter ccu_counters[] = {
-       COUNTER(RB_PERFCTR_CCU_SEL_0, RBBM_PERFCTR_CCU_0_LO, RBBM_PERFCTR_CCU_0_HI),
-       COUNTER(RB_PERFCTR_CCU_SEL_1, RBBM_PERFCTR_CCU_1_LO, RBBM_PERFCTR_CCU_1_HI),
-       COUNTER(RB_PERFCTR_CCU_SEL_2, RBBM_PERFCTR_CCU_2_LO, RBBM_PERFCTR_CCU_2_HI),
-       COUNTER(RB_PERFCTR_CCU_SEL_3, RBBM_PERFCTR_CCU_3_LO, RBBM_PERFCTR_CCU_3_HI),
+      COUNTER(RB_PERFCTR_CCU_SEL_0, RBBM_PERFCTR_CCU_0_LO, RBBM_PERFCTR_CCU_0_HI),
+      COUNTER(RB_PERFCTR_CCU_SEL_1, RBBM_PERFCTR_CCU_1_LO, RBBM_PERFCTR_CCU_1_HI),
+      COUNTER(RB_PERFCTR_CCU_SEL_2, RBBM_PERFCTR_CCU_2_LO, RBBM_PERFCTR_CCU_2_HI),
+      COUNTER(RB_PERFCTR_CCU_SEL_3, RBBM_PERFCTR_CCU_3_LO, RBBM_PERFCTR_CCU_3_HI),
 };
 
 static const struct fd_perfcntr_countable ccu_countables[] = {
-       COUNTABLE(PERF_CCU_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_STARVE_CYCLES_FLAG_RETURN, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_BLOCKS, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_BLOCKS, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_BLOCK_HIT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_BLOCK_HIT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_PARTIAL_BLOCK_READ, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_GMEM_READ, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_GMEM_WRITE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_READ_FLAG0_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_READ_FLAG1_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_READ_FLAG2_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_READ_FLAG3_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_READ_FLAG4_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_READ_FLAG0_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_READ_FLAG1_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_READ_FLAG2_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_READ_FLAG3_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_READ_FLAG4_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_2D_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_2D_RD_REQ, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_2D_WR_REQ, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_2D_REORDER_STARVE_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_2D_PIXELS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_STARVE_CYCLES_FLAG_RETURN, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_BLOCKS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_BLOCKS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_BLOCK_HIT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_BLOCK_HIT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_PARTIAL_BLOCK_READ, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_GMEM_READ, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_GMEM_WRITE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_READ_FLAG0_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_READ_FLAG1_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_READ_FLAG2_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_READ_FLAG3_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_READ_FLAG4_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_READ_FLAG0_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_READ_FLAG1_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_READ_FLAG2_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_READ_FLAG3_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_READ_FLAG4_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_2D_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_2D_RD_REQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_2D_WR_REQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_2D_REORDER_STARVE_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_2D_PIXELS, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter tse_counters[] = {
-       COUNTER(GRAS_PERFCTR_TSE_SEL_0, RBBM_PERFCTR_TSE_0_LO, RBBM_PERFCTR_TSE_0_HI),
-       COUNTER(GRAS_PERFCTR_TSE_SEL_1, RBBM_PERFCTR_TSE_1_LO, RBBM_PERFCTR_TSE_1_HI),
-       COUNTER(GRAS_PERFCTR_TSE_SEL_2, RBBM_PERFCTR_TSE_2_LO, RBBM_PERFCTR_TSE_2_HI),
-       COUNTER(GRAS_PERFCTR_TSE_SEL_3, RBBM_PERFCTR_TSE_3_LO, RBBM_PERFCTR_TSE_3_HI),
+      COUNTER(GRAS_PERFCTR_TSE_SEL_0, RBBM_PERFCTR_TSE_0_LO, RBBM_PERFCTR_TSE_0_HI),
+      COUNTER(GRAS_PERFCTR_TSE_SEL_1, RBBM_PERFCTR_TSE_1_LO, RBBM_PERFCTR_TSE_1_HI),
+      COUNTER(GRAS_PERFCTR_TSE_SEL_2, RBBM_PERFCTR_TSE_2_LO, RBBM_PERFCTR_TSE_2_HI),
+      COUNTER(GRAS_PERFCTR_TSE_SEL_3, RBBM_PERFCTR_TSE_3_LO, RBBM_PERFCTR_TSE_3_HI),
 };
 
 static const struct fd_perfcntr_countable tse_countables[] = {
-       COUNTABLE(PERF_TSE_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_CLIPPING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_STALL_CYCLES_RAS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_STALL_CYCLES_LRZ_ZPLANE, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_STARVE_CYCLES_PC, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_INPUT_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_INPUT_NULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_TRIVAL_REJ_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_CLIPPED_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_ZERO_AREA_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_FACENESS_CULLED_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_ZERO_PIXEL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_OUTPUT_NULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_OUTPUT_VISIBLE_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_CINVOCATION, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_CPRIMITIVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_2D_INPUT_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_2D_ALIVE_CLCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_CLIPPING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_STALL_CYCLES_RAS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_STALL_CYCLES_LRZ_ZPLANE, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_STARVE_CYCLES_PC, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_INPUT_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_INPUT_NULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_TRIVAL_REJ_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_CLIPPED_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_ZERO_AREA_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_FACENESS_CULLED_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_ZERO_PIXEL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_OUTPUT_NULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_OUTPUT_VISIBLE_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_CINVOCATION, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_CPRIMITIVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_2D_INPUT_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_2D_ALIVE_CLCLES, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter ras_counters[] = {
-       COUNTER(GRAS_PERFCTR_RAS_SEL_0, RBBM_PERFCTR_RAS_0_LO, RBBM_PERFCTR_RAS_0_HI),
-       COUNTER(GRAS_PERFCTR_RAS_SEL_1, RBBM_PERFCTR_RAS_1_LO, RBBM_PERFCTR_RAS_1_HI),
-       COUNTER(GRAS_PERFCTR_RAS_SEL_2, RBBM_PERFCTR_RAS_2_LO, RBBM_PERFCTR_RAS_2_HI),
-       COUNTER(GRAS_PERFCTR_RAS_SEL_3, RBBM_PERFCTR_RAS_3_LO, RBBM_PERFCTR_RAS_3_HI),
+      COUNTER(GRAS_PERFCTR_RAS_SEL_0, RBBM_PERFCTR_RAS_0_LO, RBBM_PERFCTR_RAS_0_HI),
+      COUNTER(GRAS_PERFCTR_RAS_SEL_1, RBBM_PERFCTR_RAS_1_LO, RBBM_PERFCTR_RAS_1_HI),
+      COUNTER(GRAS_PERFCTR_RAS_SEL_2, RBBM_PERFCTR_RAS_2_LO, RBBM_PERFCTR_RAS_2_HI),
+      COUNTER(GRAS_PERFCTR_RAS_SEL_3, RBBM_PERFCTR_RAS_3_LO, RBBM_PERFCTR_RAS_3_HI),
 };
 
 static const struct fd_perfcntr_countable ras_countables[] = {
-       COUNTABLE(PERF_RAS_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_SUPERTILE_ACTIVE_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_STALL_CYCLES_LRZ, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_STARVE_CYCLES_TSE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_SUPER_TILES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_8X4_TILES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_MASKGEN_ACTIVE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_FULLY_COVERED_SUPER_TILES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_FULLY_COVERED_8X4_TILES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_PRIM_KILLED_INVISILBE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_SUPERTILE_ACTIVE_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_STALL_CYCLES_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_STARVE_CYCLES_TSE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_SUPER_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_8X4_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_MASKGEN_ACTIVE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_FULLY_COVERED_SUPER_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_FULLY_COVERED_8X4_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_PRIM_KILLED_INVISILBE, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter lrz_counters[] = {
-       COUNTER(GRAS_PERFCTR_LRZ_SEL_0, RBBM_PERFCTR_LRZ_0_LO, RBBM_PERFCTR_LRZ_0_HI),
-       COUNTER(GRAS_PERFCTR_LRZ_SEL_1, RBBM_PERFCTR_LRZ_1_LO, RBBM_PERFCTR_LRZ_1_HI),
-       COUNTER(GRAS_PERFCTR_LRZ_SEL_2, RBBM_PERFCTR_LRZ_2_LO, RBBM_PERFCTR_LRZ_2_HI),
-       COUNTER(GRAS_PERFCTR_LRZ_SEL_3, RBBM_PERFCTR_LRZ_3_LO, RBBM_PERFCTR_LRZ_3_HI),
+      COUNTER(GRAS_PERFCTR_LRZ_SEL_0, RBBM_PERFCTR_LRZ_0_LO, RBBM_PERFCTR_LRZ_0_HI),
+      COUNTER(GRAS_PERFCTR_LRZ_SEL_1, RBBM_PERFCTR_LRZ_1_LO, RBBM_PERFCTR_LRZ_1_HI),
+      COUNTER(GRAS_PERFCTR_LRZ_SEL_2, RBBM_PERFCTR_LRZ_2_LO, RBBM_PERFCTR_LRZ_2_HI),
+      COUNTER(GRAS_PERFCTR_LRZ_SEL_3, RBBM_PERFCTR_LRZ_3_LO, RBBM_PERFCTR_LRZ_3_HI),
 };
 
 static const struct fd_perfcntr_countable lrz_countables[] = {
-       COUNTABLE(PERF_LRZ_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_STARVE_CYCLES_RAS, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_STALL_CYCLES_RB, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_STALL_CYCLES_VSC, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_STALL_CYCLES_VPC, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_LRZ_READ, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_LRZ_WRITE, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_READ_LATENCY, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_MERGE_CACHE_UPDATING, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_PRIM_KILLED_BY_MASKGEN, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_PRIM_KILLED_BY_LRZ, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_FULL_8X8_TILES, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_PARTIAL_8X8_TILES, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_TILE_KILLED, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_TOTAL_PIXEL, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_STARVE_CYCLES_RAS, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_STALL_CYCLES_RB, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_STALL_CYCLES_VSC, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_STALL_CYCLES_VPC, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_LRZ_READ, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_LRZ_WRITE, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_READ_LATENCY, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_MERGE_CACHE_UPDATING, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_PRIM_KILLED_BY_MASKGEN, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_PRIM_KILLED_BY_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_FULL_8X8_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_PARTIAL_8X8_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_TILE_KILLED, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_TOTAL_PIXEL, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter hlsq_counters[] = {
-       COUNTER(HLSQ_PERFCTR_HLSQ_SEL_0, RBBM_PERFCTR_HLSQ_0_LO, RBBM_PERFCTR_HLSQ_0_HI),
-       COUNTER(HLSQ_PERFCTR_HLSQ_SEL_1, RBBM_PERFCTR_HLSQ_1_LO, RBBM_PERFCTR_HLSQ_1_HI),
-       COUNTER(HLSQ_PERFCTR_HLSQ_SEL_2, RBBM_PERFCTR_HLSQ_2_LO, RBBM_PERFCTR_HLSQ_2_HI),
-       COUNTER(HLSQ_PERFCTR_HLSQ_SEL_3, RBBM_PERFCTR_HLSQ_3_LO, RBBM_PERFCTR_HLSQ_3_HI),
-       COUNTER(HLSQ_PERFCTR_HLSQ_SEL_4, RBBM_PERFCTR_HLSQ_4_LO, RBBM_PERFCTR_HLSQ_4_HI),
-       COUNTER(HLSQ_PERFCTR_HLSQ_SEL_5, RBBM_PERFCTR_HLSQ_5_LO, RBBM_PERFCTR_HLSQ_5_HI),
-       COUNTER(HLSQ_PERFCTR_HLSQ_SEL_6, RBBM_PERFCTR_HLSQ_6_LO, RBBM_PERFCTR_HLSQ_6_HI),
-       COUNTER(HLSQ_PERFCTR_HLSQ_SEL_7, RBBM_PERFCTR_HLSQ_7_LO, RBBM_PERFCTR_HLSQ_7_HI),
+      COUNTER(HLSQ_PERFCTR_HLSQ_SEL_0, RBBM_PERFCTR_HLSQ_0_LO, RBBM_PERFCTR_HLSQ_0_HI),
+      COUNTER(HLSQ_PERFCTR_HLSQ_SEL_1, RBBM_PERFCTR_HLSQ_1_LO, RBBM_PERFCTR_HLSQ_1_HI),
+      COUNTER(HLSQ_PERFCTR_HLSQ_SEL_2, RBBM_PERFCTR_HLSQ_2_LO, RBBM_PERFCTR_HLSQ_2_HI),
+      COUNTER(HLSQ_PERFCTR_HLSQ_SEL_3, RBBM_PERFCTR_HLSQ_3_LO, RBBM_PERFCTR_HLSQ_3_HI),
+      COUNTER(HLSQ_PERFCTR_HLSQ_SEL_4, RBBM_PERFCTR_HLSQ_4_LO, RBBM_PERFCTR_HLSQ_4_HI),
+      COUNTER(HLSQ_PERFCTR_HLSQ_SEL_5, RBBM_PERFCTR_HLSQ_5_LO, RBBM_PERFCTR_HLSQ_5_HI),
+      COUNTER(HLSQ_PERFCTR_HLSQ_SEL_6, RBBM_PERFCTR_HLSQ_6_LO, RBBM_PERFCTR_HLSQ_6_HI),
+      COUNTER(HLSQ_PERFCTR_HLSQ_SEL_7, RBBM_PERFCTR_HLSQ_7_LO, RBBM_PERFCTR_HLSQ_7_HI),
 };
 
 static const struct fd_perfcntr_countable hlsq_countables[] = {
-       COUNTABLE(PERF_HLSQ_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_STALL_CYCLES_SP_STATE, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_UCHE_LATENCY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_UCHE_LATENCY_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_FS_STAGE_32_WAVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_FS_STAGE_64_WAVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_QUADS, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_SP_STATE_COPY_TRANS_FS_STAGE, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_SP_STATE_COPY_TRANS_VS_STAGE, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_TP_STATE_COPY_TRANS_FS_STAGE, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_TP_STATE_COPY_TRANS_VS_STAGE, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_CS_INVOCATIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_COMPUTE_DRAWCALLS, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_STALL_CYCLES_SP_STATE, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_UCHE_LATENCY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_UCHE_LATENCY_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_FS_STAGE_32_WAVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_FS_STAGE_64_WAVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_QUADS, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_SP_STATE_COPY_TRANS_FS_STAGE, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_SP_STATE_COPY_TRANS_VS_STAGE, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_TP_STATE_COPY_TRANS_FS_STAGE, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_TP_STATE_COPY_TRANS_VS_STAGE, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_CS_INVOCATIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_COMPUTE_DRAWCALLS, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter pc_counters[] = {
-       COUNTER(PC_PERFCTR_PC_SEL_0, RBBM_PERFCTR_PC_0_LO, RBBM_PERFCTR_PC_0_HI),
-       COUNTER(PC_PERFCTR_PC_SEL_1, RBBM_PERFCTR_PC_1_LO, RBBM_PERFCTR_PC_1_HI),
-       COUNTER(PC_PERFCTR_PC_SEL_2, RBBM_PERFCTR_PC_2_LO, RBBM_PERFCTR_PC_2_HI),
-       COUNTER(PC_PERFCTR_PC_SEL_3, RBBM_PERFCTR_PC_3_LO, RBBM_PERFCTR_PC_3_HI),
-       COUNTER(PC_PERFCTR_PC_SEL_4, RBBM_PERFCTR_PC_4_LO, RBBM_PERFCTR_PC_4_HI),
-       COUNTER(PC_PERFCTR_PC_SEL_5, RBBM_PERFCTR_PC_5_LO, RBBM_PERFCTR_PC_5_HI),
-       COUNTER(PC_PERFCTR_PC_SEL_6, RBBM_PERFCTR_PC_6_LO, RBBM_PERFCTR_PC_6_HI),
-       COUNTER(PC_PERFCTR_PC_SEL_7, RBBM_PERFCTR_PC_7_LO, RBBM_PERFCTR_PC_7_HI),
+      COUNTER(PC_PERFCTR_PC_SEL_0, RBBM_PERFCTR_PC_0_LO, RBBM_PERFCTR_PC_0_HI),
+      COUNTER(PC_PERFCTR_PC_SEL_1, RBBM_PERFCTR_PC_1_LO, RBBM_PERFCTR_PC_1_HI),
+      COUNTER(PC_PERFCTR_PC_SEL_2, RBBM_PERFCTR_PC_2_LO, RBBM_PERFCTR_PC_2_HI),
+      COUNTER(PC_PERFCTR_PC_SEL_3, RBBM_PERFCTR_PC_3_LO, RBBM_PERFCTR_PC_3_HI),
+      COUNTER(PC_PERFCTR_PC_SEL_4, RBBM_PERFCTR_PC_4_LO, RBBM_PERFCTR_PC_4_HI),
+      COUNTER(PC_PERFCTR_PC_SEL_5, RBBM_PERFCTR_PC_5_LO, RBBM_PERFCTR_PC_5_HI),
+      COUNTER(PC_PERFCTR_PC_SEL_6, RBBM_PERFCTR_PC_6_LO, RBBM_PERFCTR_PC_6_HI),
+      COUNTER(PC_PERFCTR_PC_SEL_7, RBBM_PERFCTR_PC_7_LO, RBBM_PERFCTR_PC_7_HI),
 };
 
 static const struct fd_perfcntr_countable pc_countables[] = {
-       COUNTABLE(PERF_PC_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STALL_CYCLES_VFD, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STALL_CYCLES_TSE, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STALL_CYCLES_VPC, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STALL_CYCLES_TESS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STALL_CYCLES_TSE_ONLY, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STALL_CYCLES_VPC_ONLY, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_PASS1_TF_STALL_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_INDEX, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_POSITION, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STARVE_CYCLES_DI, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_VIS_STREAMS_LOADED, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_INSTANCES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_VPC_PRIMITIVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_DEAD_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_LIVE_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_VERTEX_HITS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_IA_VERTICES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_IA_PRIMITIVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_GS_PRIMITIVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_HS_INVOCATIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_DS_INVOCATIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_VS_INVOCATIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_GS_INVOCATIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_DS_PRIMITIVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_VPC_POS_DATA_TRANSACTION, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_3D_DRAWCALLS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_2D_DRAWCALLS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TESS_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TESS_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TESS_STALL_CYCLES_PC, UINT64, AVERAGE),
-       COUNTABLE(PERF_TESS_STARVE_CYCLES_PC, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STALL_CYCLES_VFD, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STALL_CYCLES_TSE, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STALL_CYCLES_VPC, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STALL_CYCLES_TESS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STALL_CYCLES_TSE_ONLY, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STALL_CYCLES_VPC_ONLY, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_PASS1_TF_STALL_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_INDEX, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_POSITION, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STARVE_CYCLES_DI, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_VIS_STREAMS_LOADED, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_INSTANCES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_VPC_PRIMITIVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_DEAD_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_LIVE_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_VERTEX_HITS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_IA_VERTICES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_IA_PRIMITIVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_GS_PRIMITIVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_HS_INVOCATIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_DS_INVOCATIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_VS_INVOCATIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_GS_INVOCATIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_DS_PRIMITIVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_VPC_POS_DATA_TRANSACTION, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_3D_DRAWCALLS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_2D_DRAWCALLS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TESS_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TESS_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TESS_STALL_CYCLES_PC, UINT64, AVERAGE),
+      COUNTABLE(PERF_TESS_STARVE_CYCLES_PC, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter rb_counters[] = {
-       COUNTER(RB_PERFCTR_RB_SEL_0, RBBM_PERFCTR_RB_0_LO, RBBM_PERFCTR_RB_0_HI),
-       COUNTER(RB_PERFCTR_RB_SEL_1, RBBM_PERFCTR_RB_1_LO, RBBM_PERFCTR_RB_1_HI),
-       COUNTER(RB_PERFCTR_RB_SEL_2, RBBM_PERFCTR_RB_2_LO, RBBM_PERFCTR_RB_2_HI),
-       COUNTER(RB_PERFCTR_RB_SEL_3, RBBM_PERFCTR_RB_3_LO, RBBM_PERFCTR_RB_3_HI),
-       COUNTER(RB_PERFCTR_RB_SEL_4, RBBM_PERFCTR_RB_4_LO, RBBM_PERFCTR_RB_4_HI),
-       COUNTER(RB_PERFCTR_RB_SEL_5, RBBM_PERFCTR_RB_5_LO, RBBM_PERFCTR_RB_5_HI),
-       COUNTER(RB_PERFCTR_RB_SEL_6, RBBM_PERFCTR_RB_6_LO, RBBM_PERFCTR_RB_6_HI),
-       COUNTER(RB_PERFCTR_RB_SEL_7, RBBM_PERFCTR_RB_7_LO, RBBM_PERFCTR_RB_7_HI),
+      COUNTER(RB_PERFCTR_RB_SEL_0, RBBM_PERFCTR_RB_0_LO, RBBM_PERFCTR_RB_0_HI),
+      COUNTER(RB_PERFCTR_RB_SEL_1, RBBM_PERFCTR_RB_1_LO, RBBM_PERFCTR_RB_1_HI),
+      COUNTER(RB_PERFCTR_RB_SEL_2, RBBM_PERFCTR_RB_2_LO, RBBM_PERFCTR_RB_2_HI),
+      COUNTER(RB_PERFCTR_RB_SEL_3, RBBM_PERFCTR_RB_3_LO, RBBM_PERFCTR_RB_3_HI),
+      COUNTER(RB_PERFCTR_RB_SEL_4, RBBM_PERFCTR_RB_4_LO, RBBM_PERFCTR_RB_4_HI),
+      COUNTER(RB_PERFCTR_RB_SEL_5, RBBM_PERFCTR_RB_5_LO, RBBM_PERFCTR_RB_5_HI),
+      COUNTER(RB_PERFCTR_RB_SEL_6, RBBM_PERFCTR_RB_6_LO, RBBM_PERFCTR_RB_6_HI),
+      COUNTER(RB_PERFCTR_RB_SEL_7, RBBM_PERFCTR_RB_7_LO, RBBM_PERFCTR_RB_7_HI),
 };
 
 static const struct fd_perfcntr_countable rb_countables[] = {
-       COUNTABLE(PERF_RB_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STALL_CYCLES_CCU, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STALL_CYCLES_HLSQ, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STALL_CYCLES_FIFO0_FULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STALL_CYCLES_FIFO1_FULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STALL_CYCLES_FIFO2_FULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STARVE_CYCLES_SP, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STARVE_CYCLES_LRZ_TILE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STARVE_CYCLES_CCU, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STARVE_CYCLES_Z_PLANE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STARVE_CYCLES_BARY_PLANE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_Z_WORKLOAD, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_HLSQ_ACTIVE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_Z_READ, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_Z_WRITE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_C_READ, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_C_WRITE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_TOTAL_PASS, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_Z_PASS, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_Z_FAIL, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_S_FAIL, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_BLENDED_FXP_COMPONENTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_BLENDED_FP16_COMPONENTS, UINT64, AVERAGE),
-       COUNTABLE(RB_RESERVED, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_2D_ALIVE_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_2D_STALL_CYCLES_A2D, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_2D_STARVE_CYCLES_SRC, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_2D_STARVE_CYCLES_SP, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_2D_STARVE_CYCLES_DST, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_2D_VALID_PIXELS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STALL_CYCLES_CCU, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STALL_CYCLES_HLSQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STALL_CYCLES_FIFO0_FULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STALL_CYCLES_FIFO1_FULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STALL_CYCLES_FIFO2_FULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STARVE_CYCLES_SP, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STARVE_CYCLES_LRZ_TILE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STARVE_CYCLES_CCU, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STARVE_CYCLES_Z_PLANE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STARVE_CYCLES_BARY_PLANE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_Z_WORKLOAD, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_HLSQ_ACTIVE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_Z_READ, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_Z_WRITE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_C_READ, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_C_WRITE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_TOTAL_PASS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_Z_PASS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_Z_FAIL, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_S_FAIL, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_BLENDED_FXP_COMPONENTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_BLENDED_FP16_COMPONENTS, UINT64, AVERAGE),
+      COUNTABLE(RB_RESERVED, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_2D_ALIVE_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_2D_STALL_CYCLES_A2D, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_2D_STARVE_CYCLES_SRC, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_2D_STARVE_CYCLES_SP, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_2D_STARVE_CYCLES_DST, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_2D_VALID_PIXELS, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter rbbm_counters[] = {
-//RESERVED: for kernel
-//     COUNTER(RBBM_PERFCTR_RBBM_SEL_0, RBBM_PERFCTR_RBBM_0_LO, RBBM_PERFCTR_RBBM_0_HI),
-       COUNTER(RBBM_PERFCTR_RBBM_SEL_1, RBBM_PERFCTR_RBBM_1_LO, RBBM_PERFCTR_RBBM_1_HI),
-       COUNTER(RBBM_PERFCTR_RBBM_SEL_2, RBBM_PERFCTR_RBBM_2_LO, RBBM_PERFCTR_RBBM_2_HI),
-       COUNTER(RBBM_PERFCTR_RBBM_SEL_3, RBBM_PERFCTR_RBBM_3_LO, RBBM_PERFCTR_RBBM_3_HI),
+      //RESERVED: for kernel
+      //       COUNTER(RBBM_PERFCTR_RBBM_SEL_0, RBBM_PERFCTR_RBBM_0_LO, RBBM_PERFCTR_RBBM_0_HI),
+      COUNTER(RBBM_PERFCTR_RBBM_SEL_1, RBBM_PERFCTR_RBBM_1_LO, RBBM_PERFCTR_RBBM_1_HI),
+      COUNTER(RBBM_PERFCTR_RBBM_SEL_2, RBBM_PERFCTR_RBBM_2_LO, RBBM_PERFCTR_RBBM_2_HI),
+      COUNTER(RBBM_PERFCTR_RBBM_SEL_3, RBBM_PERFCTR_RBBM_3_LO, RBBM_PERFCTR_RBBM_3_HI),
 };
 
 static const struct fd_perfcntr_countable rbbm_countables[] = {
-       COUNTABLE(PERF_RBBM_ALWAYS_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_ALWAYS_ON, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_TSE_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_RAS_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_PC_DCALL_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_PC_VSD_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_STATUS_MASKED, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_COM_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_DCOM_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_VBIF_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_VSC_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_TESS_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_UCHE_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_HLSQ_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_ALWAYS_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_ALWAYS_ON, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_TSE_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_RAS_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_PC_DCALL_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_PC_VSD_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_STATUS_MASKED, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_COM_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_DCOM_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_VBIF_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_VSC_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_TESS_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_UCHE_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_HLSQ_BUSY, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter sp_counters[] = {
-//RESERVED: for kernel
-//     COUNTER(SP_PERFCTR_SP_SEL_0,  RBBM_PERFCTR_SP_0_LO,  RBBM_PERFCTR_SP_0_HI),
-       COUNTER(SP_PERFCTR_SP_SEL_1,  RBBM_PERFCTR_SP_1_LO,  RBBM_PERFCTR_SP_1_HI),
-       COUNTER(SP_PERFCTR_SP_SEL_2,  RBBM_PERFCTR_SP_2_LO,  RBBM_PERFCTR_SP_2_HI),
-       COUNTER(SP_PERFCTR_SP_SEL_3,  RBBM_PERFCTR_SP_3_LO,  RBBM_PERFCTR_SP_3_HI),
-       COUNTER(SP_PERFCTR_SP_SEL_4,  RBBM_PERFCTR_SP_4_LO,  RBBM_PERFCTR_SP_4_HI),
-       COUNTER(SP_PERFCTR_SP_SEL_5,  RBBM_PERFCTR_SP_5_LO,  RBBM_PERFCTR_SP_5_HI),
-       COUNTER(SP_PERFCTR_SP_SEL_6,  RBBM_PERFCTR_SP_6_LO,  RBBM_PERFCTR_SP_6_HI),
-       COUNTER(SP_PERFCTR_SP_SEL_7,  RBBM_PERFCTR_SP_7_LO,  RBBM_PERFCTR_SP_7_HI),
-       COUNTER(SP_PERFCTR_SP_SEL_8,  RBBM_PERFCTR_SP_8_LO,  RBBM_PERFCTR_SP_8_HI),
-       COUNTER(SP_PERFCTR_SP_SEL_9,  RBBM_PERFCTR_SP_9_LO,  RBBM_PERFCTR_SP_9_HI),
-       COUNTER(SP_PERFCTR_SP_SEL_10, RBBM_PERFCTR_SP_10_LO, RBBM_PERFCTR_SP_10_HI),
-       COUNTER(SP_PERFCTR_SP_SEL_11, RBBM_PERFCTR_SP_11_LO, RBBM_PERFCTR_SP_11_HI),
+      //RESERVED: for kernel
+      //       COUNTER(SP_PERFCTR_SP_SEL_0,  RBBM_PERFCTR_SP_0_LO,  RBBM_PERFCTR_SP_0_HI),
+      COUNTER(SP_PERFCTR_SP_SEL_1,  RBBM_PERFCTR_SP_1_LO,  RBBM_PERFCTR_SP_1_HI),
+      COUNTER(SP_PERFCTR_SP_SEL_2,  RBBM_PERFCTR_SP_2_LO,  RBBM_PERFCTR_SP_2_HI),
+      COUNTER(SP_PERFCTR_SP_SEL_3,  RBBM_PERFCTR_SP_3_LO,  RBBM_PERFCTR_SP_3_HI),
+      COUNTER(SP_PERFCTR_SP_SEL_4,  RBBM_PERFCTR_SP_4_LO,  RBBM_PERFCTR_SP_4_HI),
+      COUNTER(SP_PERFCTR_SP_SEL_5,  RBBM_PERFCTR_SP_5_LO,  RBBM_PERFCTR_SP_5_HI),
+      COUNTER(SP_PERFCTR_SP_SEL_6,  RBBM_PERFCTR_SP_6_LO,  RBBM_PERFCTR_SP_6_HI),
+      COUNTER(SP_PERFCTR_SP_SEL_7,  RBBM_PERFCTR_SP_7_LO,  RBBM_PERFCTR_SP_7_HI),
+      COUNTER(SP_PERFCTR_SP_SEL_8,  RBBM_PERFCTR_SP_8_LO,  RBBM_PERFCTR_SP_8_HI),
+      COUNTER(SP_PERFCTR_SP_SEL_9,  RBBM_PERFCTR_SP_9_LO,  RBBM_PERFCTR_SP_9_HI),
+      COUNTER(SP_PERFCTR_SP_SEL_10, RBBM_PERFCTR_SP_10_LO, RBBM_PERFCTR_SP_10_HI),
+      COUNTER(SP_PERFCTR_SP_SEL_11, RBBM_PERFCTR_SP_11_LO, RBBM_PERFCTR_SP_11_HI),
 };
 
 static const struct fd_perfcntr_countable sp_countables[] = {
-       COUNTABLE(PERF_SP_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_ALU_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_EFU_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_STALL_CYCLES_VPC, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_STALL_CYCLES_TP, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_STALL_CYCLES_RB, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_SCHEDULER_NON_WORKING, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_CONTEXTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_CONTEXT_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_WAVE_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_WAVE_SAMPLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_WAVE_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_WAVE_SAMPLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_DURATION_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_DURATION_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_CTRL_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_LOAD_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_EMIT_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_NOP_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_WAIT_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_FETCH_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_IDLE_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_END_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_LONG_SYNC_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_SHORT_SYNC_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_JOIN_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_LM_LOAD_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_LM_STORE_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_LM_ATOMICS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GM_LOAD_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GM_STORE_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GM_ATOMICS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_CFLOW_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_TEX_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_BARY_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_ADDR_LOCK_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_UCHE_READ_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_UCHE_WRITE_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_EXPORT_VPC_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_EXPORT_RB_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_PIXELS_KILLED, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_ICL1_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_ICL1_MISSES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_ICL0_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_ICL0_MISSES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_HS_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_DS_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GS_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_CS_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GPR_READ, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GPR_WRITE, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_LM_CH0_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_LM_CH1_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_LM_BANK_CONFLICTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_ALU_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_EFU_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_STALL_CYCLES_VPC, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_STALL_CYCLES_TP, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_STALL_CYCLES_RB, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_SCHEDULER_NON_WORKING, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_CONTEXTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_CONTEXT_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_WAVE_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_WAVE_SAMPLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_WAVE_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_WAVE_SAMPLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_DURATION_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_DURATION_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_CTRL_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_LOAD_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_EMIT_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_NOP_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_WAIT_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_FETCH_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_IDLE_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_END_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_LONG_SYNC_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_SHORT_SYNC_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_JOIN_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_LM_LOAD_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_LM_STORE_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_LM_ATOMICS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GM_LOAD_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GM_STORE_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GM_ATOMICS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_CFLOW_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_TEX_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_BARY_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_ADDR_LOCK_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_UCHE_READ_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_UCHE_WRITE_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_EXPORT_VPC_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_EXPORT_RB_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_PIXELS_KILLED, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_ICL1_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_ICL1_MISSES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_ICL0_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_ICL0_MISSES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_HS_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_DS_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GS_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_CS_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GPR_READ, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GPR_WRITE, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_LM_CH0_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_LM_CH1_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_LM_BANK_CONFLICTS, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter tp_counters[] = {
-       COUNTER(TPL1_PERFCTR_TP_SEL_0, RBBM_PERFCTR_TP_0_LO, RBBM_PERFCTR_TP_0_HI),
-       COUNTER(TPL1_PERFCTR_TP_SEL_1, RBBM_PERFCTR_TP_1_LO, RBBM_PERFCTR_TP_1_HI),
-       COUNTER(TPL1_PERFCTR_TP_SEL_2, RBBM_PERFCTR_TP_2_LO, RBBM_PERFCTR_TP_2_HI),
-       COUNTER(TPL1_PERFCTR_TP_SEL_3, RBBM_PERFCTR_TP_3_LO, RBBM_PERFCTR_TP_3_HI),
-       COUNTER(TPL1_PERFCTR_TP_SEL_4, RBBM_PERFCTR_TP_4_LO, RBBM_PERFCTR_TP_4_HI),
-       COUNTER(TPL1_PERFCTR_TP_SEL_5, RBBM_PERFCTR_TP_5_LO, RBBM_PERFCTR_TP_5_HI),
-       COUNTER(TPL1_PERFCTR_TP_SEL_6, RBBM_PERFCTR_TP_6_LO, RBBM_PERFCTR_TP_6_HI),
-       COUNTER(TPL1_PERFCTR_TP_SEL_7, RBBM_PERFCTR_TP_7_LO, RBBM_PERFCTR_TP_7_HI),
+      COUNTER(TPL1_PERFCTR_TP_SEL_0, RBBM_PERFCTR_TP_0_LO, RBBM_PERFCTR_TP_0_HI),
+      COUNTER(TPL1_PERFCTR_TP_SEL_1, RBBM_PERFCTR_TP_1_LO, RBBM_PERFCTR_TP_1_HI),
+      COUNTER(TPL1_PERFCTR_TP_SEL_2, RBBM_PERFCTR_TP_2_LO, RBBM_PERFCTR_TP_2_HI),
+      COUNTER(TPL1_PERFCTR_TP_SEL_3, RBBM_PERFCTR_TP_3_LO, RBBM_PERFCTR_TP_3_HI),
+      COUNTER(TPL1_PERFCTR_TP_SEL_4, RBBM_PERFCTR_TP_4_LO, RBBM_PERFCTR_TP_4_HI),
+      COUNTER(TPL1_PERFCTR_TP_SEL_5, RBBM_PERFCTR_TP_5_LO, RBBM_PERFCTR_TP_5_HI),
+      COUNTER(TPL1_PERFCTR_TP_SEL_6, RBBM_PERFCTR_TP_6_LO, RBBM_PERFCTR_TP_6_HI),
+      COUNTER(TPL1_PERFCTR_TP_SEL_7, RBBM_PERFCTR_TP_7_LO, RBBM_PERFCTR_TP_7_HI),
 };
 
 static const struct fd_perfcntr_countable tp_countables[] = {
-       COUNTABLE(PERF_TP_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_LATENCY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_LATENCY_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_FLAG_CACHE_REQUEST_SAMPLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_FLAG_CACHE_REQUEST_LATENCY, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_CACHELINE_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_CACHELINE_MISSES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_SP_TP_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_TP_SP_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_OUTPUT_PIXELS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_FILTER_WORKLOAD_16BIT, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_FILTER_WORKLOAD_32BIT, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_RECEIVED, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_OFFSET, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_SHADOW, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_ARRAY, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_GRADIENT, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_1D, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_2D, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_BUFFER, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_3D, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_CUBE, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_STATE_CACHE_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_STATE_CACHE_MISSES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_DIVERGENT_QUADS_RECEIVED, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_BINDLESS_STATE_CACHE_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_BINDLESS_STATE_CACHE_MISSES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_PRT_NON_RESIDENT_EVENTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_OUTPUT_PIXELS_POINT, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_OUTPUT_PIXELS_BILINEAR, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_OUTPUT_PIXELS_MIP, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_OUTPUT_PIXELS_ANISO, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_FLAG_CACHE_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_FLAG_CACHE_MISSES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_5_L2_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS_POINT, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS_BILINEAR, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_2D_FILTER_WORKLOAD_16BIT, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_2D_FILTER_WORKLOAD_32BIT, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_LATENCY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_LATENCY_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_FLAG_CACHE_REQUEST_SAMPLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_FLAG_CACHE_REQUEST_LATENCY, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_CACHELINE_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_CACHELINE_MISSES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_SP_TP_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_TP_SP_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_OUTPUT_PIXELS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_FILTER_WORKLOAD_16BIT, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_FILTER_WORKLOAD_32BIT, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_RECEIVED, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_OFFSET, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_SHADOW, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_ARRAY, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_GRADIENT, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_1D, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_2D, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_BUFFER, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_3D, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_CUBE, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_STATE_CACHE_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_STATE_CACHE_MISSES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_DIVERGENT_QUADS_RECEIVED, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_BINDLESS_STATE_CACHE_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_BINDLESS_STATE_CACHE_MISSES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_PRT_NON_RESIDENT_EVENTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_OUTPUT_PIXELS_POINT, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_OUTPUT_PIXELS_BILINEAR, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_OUTPUT_PIXELS_MIP, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_OUTPUT_PIXELS_ANISO, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_FLAG_CACHE_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_FLAG_CACHE_MISSES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_5_L2_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS_POINT, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS_BILINEAR, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_2D_FILTER_WORKLOAD_16BIT, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_2D_FILTER_WORKLOAD_32BIT, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter uche_counters[] = {
-       COUNTER(UCHE_PERFCTR_UCHE_SEL_0, RBBM_PERFCTR_UCHE_0_LO, RBBM_PERFCTR_UCHE_0_HI),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL_1, RBBM_PERFCTR_UCHE_1_LO, RBBM_PERFCTR_UCHE_1_HI),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL_2, RBBM_PERFCTR_UCHE_2_LO, RBBM_PERFCTR_UCHE_2_HI),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL_3, RBBM_PERFCTR_UCHE_3_LO, RBBM_PERFCTR_UCHE_3_HI),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL_4, RBBM_PERFCTR_UCHE_4_LO, RBBM_PERFCTR_UCHE_4_HI),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL_5, RBBM_PERFCTR_UCHE_5_LO, RBBM_PERFCTR_UCHE_5_HI),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL_6, RBBM_PERFCTR_UCHE_6_LO, RBBM_PERFCTR_UCHE_6_HI),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL_7, RBBM_PERFCTR_UCHE_7_LO, RBBM_PERFCTR_UCHE_7_HI),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL_0, RBBM_PERFCTR_UCHE_0_LO, RBBM_PERFCTR_UCHE_0_HI),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL_1, RBBM_PERFCTR_UCHE_1_LO, RBBM_PERFCTR_UCHE_1_HI),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL_2, RBBM_PERFCTR_UCHE_2_LO, RBBM_PERFCTR_UCHE_2_HI),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL_3, RBBM_PERFCTR_UCHE_3_LO, RBBM_PERFCTR_UCHE_3_HI),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL_4, RBBM_PERFCTR_UCHE_4_LO, RBBM_PERFCTR_UCHE_4_HI),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL_5, RBBM_PERFCTR_UCHE_5_LO, RBBM_PERFCTR_UCHE_5_HI),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL_6, RBBM_PERFCTR_UCHE_6_LO, RBBM_PERFCTR_UCHE_6_HI),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL_7, RBBM_PERFCTR_UCHE_7_LO, RBBM_PERFCTR_UCHE_7_HI),
 };
 
 static const struct fd_perfcntr_countable uche_countables[] = {
-       COUNTABLE(PERF_UCHE_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_STALL_CYCLES_VBIF, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_LATENCY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_LATENCY_SAMPLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_TP, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_VFD, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_HLSQ, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_LRZ, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_SP, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_READ_REQUESTS_TP, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_READ_REQUESTS_VFD, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_READ_REQUESTS_HLSQ, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_READ_REQUESTS_LRZ, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_READ_REQUESTS_SP, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_WRITE_REQUESTS_LRZ, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_WRITE_REQUESTS_SP, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_WRITE_REQUESTS_VPC, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_WRITE_REQUESTS_VSC, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_EVICTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ0, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ1, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ2, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ3, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ4, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ5, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ6, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ7, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_CH0, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_CH1, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_GMEM_READ_BEATS, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_FLAG_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_STALL_CYCLES_VBIF, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_LATENCY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_LATENCY_SAMPLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_TP, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_VFD, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_HLSQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_SP, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_READ_REQUESTS_TP, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_READ_REQUESTS_VFD, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_READ_REQUESTS_HLSQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_READ_REQUESTS_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_READ_REQUESTS_SP, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_WRITE_REQUESTS_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_WRITE_REQUESTS_SP, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_WRITE_REQUESTS_VPC, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_WRITE_REQUESTS_VSC, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_EVICTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ0, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ1, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ2, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ3, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ4, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ5, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ6, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ7, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_CH0, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_CH1, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_GMEM_READ_BEATS, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_FLAG_COUNT, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter vfd_counters[] = {
-       COUNTER(VFD_PERFCTR_VFD_SEL_0, RBBM_PERFCTR_VFD_0_LO, RBBM_PERFCTR_VFD_0_HI),
-       COUNTER(VFD_PERFCTR_VFD_SEL_1, RBBM_PERFCTR_VFD_1_LO, RBBM_PERFCTR_VFD_1_HI),
-       COUNTER(VFD_PERFCTR_VFD_SEL_2, RBBM_PERFCTR_VFD_2_LO, RBBM_PERFCTR_VFD_2_HI),
-       COUNTER(VFD_PERFCTR_VFD_SEL_3, RBBM_PERFCTR_VFD_3_LO, RBBM_PERFCTR_VFD_3_HI),
-       COUNTER(VFD_PERFCTR_VFD_SEL_4, RBBM_PERFCTR_VFD_4_LO, RBBM_PERFCTR_VFD_4_HI),
-       COUNTER(VFD_PERFCTR_VFD_SEL_5, RBBM_PERFCTR_VFD_5_LO, RBBM_PERFCTR_VFD_5_HI),
-       COUNTER(VFD_PERFCTR_VFD_SEL_6, RBBM_PERFCTR_VFD_6_LO, RBBM_PERFCTR_VFD_6_HI),
-       COUNTER(VFD_PERFCTR_VFD_SEL_7, RBBM_PERFCTR_VFD_7_LO, RBBM_PERFCTR_VFD_7_HI),
+      COUNTER(VFD_PERFCTR_VFD_SEL_0, RBBM_PERFCTR_VFD_0_LO, RBBM_PERFCTR_VFD_0_HI),
+      COUNTER(VFD_PERFCTR_VFD_SEL_1, RBBM_PERFCTR_VFD_1_LO, RBBM_PERFCTR_VFD_1_HI),
+      COUNTER(VFD_PERFCTR_VFD_SEL_2, RBBM_PERFCTR_VFD_2_LO, RBBM_PERFCTR_VFD_2_HI),
+      COUNTER(VFD_PERFCTR_VFD_SEL_3, RBBM_PERFCTR_VFD_3_LO, RBBM_PERFCTR_VFD_3_HI),
+      COUNTER(VFD_PERFCTR_VFD_SEL_4, RBBM_PERFCTR_VFD_4_LO, RBBM_PERFCTR_VFD_4_HI),
+      COUNTER(VFD_PERFCTR_VFD_SEL_5, RBBM_PERFCTR_VFD_5_LO, RBBM_PERFCTR_VFD_5_HI),
+      COUNTER(VFD_PERFCTR_VFD_SEL_6, RBBM_PERFCTR_VFD_6_LO, RBBM_PERFCTR_VFD_6_HI),
+      COUNTER(VFD_PERFCTR_VFD_SEL_7, RBBM_PERFCTR_VFD_7_LO, RBBM_PERFCTR_VFD_7_HI),
 };
 
 static const struct fd_perfcntr_countable vfd_countables[] = {
-       COUNTABLE(PERF_VFD_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_STALL_CYCLES_VPC_ALLOC, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_STALL_CYCLES_MISS_VB, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_STALL_CYCLES_MISS_Q, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_STALL_CYCLES_SP_INFO, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_STALL_CYCLES_SP_ATTR, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_STALL_CYCLES_VFDP_VB, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_STALL_CYCLES_VFDP_Q, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_DECODER_PACKER_STALL, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_STARVE_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_RBUFFER_FULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_ATTR_INFO_FIFO_FULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_DECODED_ATTRIBUTE_BYTES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_NUM_ATTRIBUTES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_UPPER_SHADER_FIBERS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_LOWER_SHADER_FIBERS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_MODE_0_FIBERS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_MODE_1_FIBERS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_MODE_2_FIBERS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_MODE_3_FIBERS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_MODE_4_FIBERS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_TOTAL_VERTICES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_NUM_ATTR_MISS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_1_BURST_REQ, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD_INDEX, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD_PROG, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFDP_STARVE_CYCLES_PC, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFDP_VS_STAGE_32_WAVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_STALL_CYCLES_VPC_ALLOC, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_STALL_CYCLES_MISS_VB, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_STALL_CYCLES_MISS_Q, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_STALL_CYCLES_SP_INFO, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_STALL_CYCLES_SP_ATTR, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_STALL_CYCLES_VFDP_VB, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_STALL_CYCLES_VFDP_Q, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_DECODER_PACKER_STALL, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_STARVE_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_RBUFFER_FULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_ATTR_INFO_FIFO_FULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_DECODED_ATTRIBUTE_BYTES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_NUM_ATTRIBUTES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_UPPER_SHADER_FIBERS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_LOWER_SHADER_FIBERS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_MODE_0_FIBERS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_MODE_1_FIBERS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_MODE_2_FIBERS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_MODE_3_FIBERS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_MODE_4_FIBERS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_TOTAL_VERTICES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_NUM_ATTR_MISS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_1_BURST_REQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD_INDEX, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD_PROG, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFDP_STARVE_CYCLES_PC, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFDP_VS_STAGE_32_WAVES, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter vpc_counters[] = {
-       COUNTER(VPC_PERFCTR_VPC_SEL_0, RBBM_PERFCTR_VPC_0_LO, RBBM_PERFCTR_VPC_0_HI),
-       COUNTER(VPC_PERFCTR_VPC_SEL_1, RBBM_PERFCTR_VPC_1_LO, RBBM_PERFCTR_VPC_1_HI),
-       COUNTER(VPC_PERFCTR_VPC_SEL_2, RBBM_PERFCTR_VPC_2_LO, RBBM_PERFCTR_VPC_2_HI),
-       COUNTER(VPC_PERFCTR_VPC_SEL_3, RBBM_PERFCTR_VPC_3_LO, RBBM_PERFCTR_VPC_3_HI),
+      COUNTER(VPC_PERFCTR_VPC_SEL_0, RBBM_PERFCTR_VPC_0_LO, RBBM_PERFCTR_VPC_0_HI),
+      COUNTER(VPC_PERFCTR_VPC_SEL_1, RBBM_PERFCTR_VPC_1_LO, RBBM_PERFCTR_VPC_1_HI),
+      COUNTER(VPC_PERFCTR_VPC_SEL_2, RBBM_PERFCTR_VPC_2_LO, RBBM_PERFCTR_VPC_2_HI),
+      COUNTER(VPC_PERFCTR_VPC_SEL_3, RBBM_PERFCTR_VPC_3_LO, RBBM_PERFCTR_VPC_3_HI),
 };
 
 static const struct fd_perfcntr_countable vpc_countables[] = {
-       COUNTABLE(PERF_VPC_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STALL_CYCLES_VFD_WACK, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STALL_CYCLES_PC, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STALL_CYCLES_SP_LM, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_POS_EXPORT_STALL_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STARVE_CYCLES_SP, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STARVE_CYCLES_LRZ, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_PC_PRIMITIVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_SP_COMPONENTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_SP_LM_PRIMITIVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_SP_LM_COMPONENTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_SP_LM_DWORDS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STREAMOUT_COMPONENTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_GRANT_PHASES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STALL_CYCLES_VFD_WACK, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STALL_CYCLES_PC, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STALL_CYCLES_SP_LM, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_POS_EXPORT_STALL_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STARVE_CYCLES_SP, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STARVE_CYCLES_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_PC_PRIMITIVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_SP_COMPONENTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_SP_LM_PRIMITIVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_SP_LM_COMPONENTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_SP_LM_DWORDS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STREAMOUT_COMPONENTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_GRANT_PHASES, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter vsc_counters[] = {
-       COUNTER(VSC_PERFCTR_VSC_SEL_0, RBBM_PERFCTR_VSC_0_LO, RBBM_PERFCTR_VSC_0_HI),
-       COUNTER(VSC_PERFCTR_VSC_SEL_1, RBBM_PERFCTR_VSC_1_LO, RBBM_PERFCTR_VSC_1_HI),
+      COUNTER(VSC_PERFCTR_VSC_SEL_0, RBBM_PERFCTR_VSC_0_LO, RBBM_PERFCTR_VSC_0_HI),
+      COUNTER(VSC_PERFCTR_VSC_SEL_1, RBBM_PERFCTR_VSC_1_LO, RBBM_PERFCTR_VSC_1_HI),
 };
 
 static const struct fd_perfcntr_countable vsc_countables[] = {
-       COUNTABLE(PERF_VSC_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VSC_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VSC_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_VSC_EOT_NUM, UINT64, AVERAGE),
+      COUNTABLE(PERF_VSC_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VSC_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VSC_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_VSC_EOT_NUM, UINT64, AVERAGE),
 };
 
 /* VBIF counters probably not too userful for userspace, and they make
@@ -620,119 +620,119 @@ static const struct fd_perfcntr_countable vsc_countables[] = {
 #if 0
 /* VBIF counters break the pattern a bit, with enable and clear regs: */
 static const struct fd_perfcntr_counter vbif_counters[] = {
-       COUNTER2(VBIF_PERF_CNT_SEL0, VBIF_PERF_CNT_LOW0, VBIF_PERF_CNT_HIGH0, VBIF_PERF_CNT_EN0, VBIF_PERF_CNT_CLR0),
-       COUNTER2(VBIF_PERF_CNT_SEL1, VBIF_PERF_CNT_LOW1, VBIF_PERF_CNT_HIGH1, VBIF_PERF_CNT_EN1, VBIF_PERF_CNT_CLR1),
-       COUNTER2(VBIF_PERF_CNT_SEL2, VBIF_PERF_CNT_LOW2, VBIF_PERF_CNT_HIGH2, VBIF_PERF_CNT_EN2, VBIF_PERF_CNT_CLR2),
-       COUNTER2(VBIF_PERF_CNT_SEL3, VBIF_PERF_CNT_LOW3, VBIF_PERF_CNT_HIGH3, VBIF_PERF_CNT_EN3, VBIF_PERF_CNT_CLR3),
+      COUNTER2(VBIF_PERF_CNT_SEL0, VBIF_PERF_CNT_LOW0, VBIF_PERF_CNT_HIGH0, VBIF_PERF_CNT_EN0, VBIF_PERF_CNT_CLR0),
+      COUNTER2(VBIF_PERF_CNT_SEL1, VBIF_PERF_CNT_LOW1, VBIF_PERF_CNT_HIGH1, VBIF_PERF_CNT_EN1, VBIF_PERF_CNT_CLR1),
+      COUNTER2(VBIF_PERF_CNT_SEL2, VBIF_PERF_CNT_LOW2, VBIF_PERF_CNT_HIGH2, VBIF_PERF_CNT_EN2, VBIF_PERF_CNT_CLR2),
+      COUNTER2(VBIF_PERF_CNT_SEL3, VBIF_PERF_CNT_LOW3, VBIF_PERF_CNT_HIGH3, VBIF_PERF_CNT_EN3, VBIF_PERF_CNT_CLR3),
 };
 
 static const struct fd_perfcntr_countable vbif_countables[] = {
-       COUNTABLE(AXI_READ_REQUESTS_ID_0, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_1, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_2, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_3, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_4, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_5, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_6, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_7, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_8, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_9, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_10, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_11, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_12, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_13, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_14, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_ID_15, UINT64, AVERAGE),
-       COUNTABLE(AXI0_READ_REQUESTS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI1_READ_REQUESTS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI2_READ_REQUESTS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI3_READ_REQUESTS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_REQUESTS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_0, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_1, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_2, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_3, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_4, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_5, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_6, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_7, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_8, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_9, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_10, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_11, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_12, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_13, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_14, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_ID_15, UINT64, AVERAGE),
-       COUNTABLE(AXI0_WRITE_REQUESTS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI1_WRITE_REQUESTS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI2_WRITE_REQUESTS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI3_WRITE_REQUESTS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_REQUESTS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI_TOTAL_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_0, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_1, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_2, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_3, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_4, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_5, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_6, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_7, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_8, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_9, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_10, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_11, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_12, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_13, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_14, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_ID_15, UINT64, AVERAGE),
-       COUNTABLE(AXI0_READ_DATA_BEATS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI1_READ_DATA_BEATS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI2_READ_DATA_BEATS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI3_READ_DATA_BEATS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI_READ_DATA_BEATS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_0, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_1, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_2, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_3, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_4, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_5, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_6, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_7, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_8, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_9, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_10, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_11, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_12, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_13, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_14, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_ID_15, UINT64, AVERAGE),
-       COUNTABLE(AXI0_WRITE_DATA_BEATS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI1_WRITE_DATA_BEATS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI2_WRITE_DATA_BEATS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI3_WRITE_DATA_BEATS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI_WRITE_DATA_BEATS_TOTAL, UINT64, AVERAGE),
-       COUNTABLE(AXI_DATA_BEATS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_0, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_1, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_2, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_3, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_4, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_5, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_6, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_7, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_8, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_9, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_10, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_11, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_12, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_13, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_14, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_ID_15, UINT64, AVERAGE),
+      COUNTABLE(AXI0_READ_REQUESTS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI1_READ_REQUESTS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI2_READ_REQUESTS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI3_READ_REQUESTS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_REQUESTS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_0, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_1, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_2, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_3, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_4, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_5, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_6, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_7, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_8, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_9, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_10, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_11, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_12, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_13, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_14, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_ID_15, UINT64, AVERAGE),
+      COUNTABLE(AXI0_WRITE_REQUESTS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI1_WRITE_REQUESTS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI2_WRITE_REQUESTS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI3_WRITE_REQUESTS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_REQUESTS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI_TOTAL_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_0, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_1, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_2, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_3, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_4, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_5, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_6, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_7, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_8, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_9, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_10, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_11, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_12, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_13, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_14, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_ID_15, UINT64, AVERAGE),
+      COUNTABLE(AXI0_READ_DATA_BEATS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI1_READ_DATA_BEATS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI2_READ_DATA_BEATS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI3_READ_DATA_BEATS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI_READ_DATA_BEATS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_0, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_1, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_2, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_3, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_4, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_5, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_6, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_7, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_8, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_9, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_10, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_11, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_12, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_13, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_14, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_ID_15, UINT64, AVERAGE),
+      COUNTABLE(AXI0_WRITE_DATA_BEATS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI1_WRITE_DATA_BEATS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI2_WRITE_DATA_BEATS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI3_WRITE_DATA_BEATS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI_WRITE_DATA_BEATS_TOTAL, UINT64, AVERAGE),
+      COUNTABLE(AXI_DATA_BEATS_TOTAL, UINT64, AVERAGE),
 };
 #endif
 
 const struct fd_perfcntr_group a5xx_perfcntr_groups[] = {
-       GROUP("CP", cp_counters, cp_countables),
-       GROUP("CCU", ccu_counters, ccu_countables),
-       GROUP("TSE", tse_counters, tse_countables),
-       GROUP("RAS", ras_counters, ras_countables),
-       GROUP("LRZ", lrz_counters, lrz_countables),
-       GROUP("HLSQ", hlsq_counters, hlsq_countables),
-       GROUP("PC", pc_counters, pc_countables),
-       GROUP("RB", rb_counters, rb_countables),
-       GROUP("RBBM", rbbm_counters, rbbm_countables),
-       GROUP("SP", sp_counters, sp_countables),
-       GROUP("TP", tp_counters, tp_countables),
-       GROUP("UCHE", uche_counters, uche_countables),
-       GROUP("VFD", vfd_counters, vfd_countables),
-       GROUP("VPC", vpc_counters, vpc_countables),
-       GROUP("VSC", vsc_counters, vsc_countables),
-//     GROUP("VBIF", vbif_counters, vbif_countables),
+      GROUP("CP", cp_counters, cp_countables),
+      GROUP("CCU", ccu_counters, ccu_countables),
+      GROUP("TSE", tse_counters, tse_countables),
+      GROUP("RAS", ras_counters, ras_countables),
+      GROUP("LRZ", lrz_counters, lrz_countables),
+      GROUP("HLSQ", hlsq_counters, hlsq_countables),
+      GROUP("PC", pc_counters, pc_countables),
+      GROUP("RB", rb_counters, rb_countables),
+      GROUP("RBBM", rbbm_counters, rbbm_countables),
+      GROUP("SP", sp_counters, sp_countables),
+      GROUP("TP", tp_counters, tp_countables),
+      GROUP("UCHE", uche_counters, uche_countables),
+      GROUP("VFD", vfd_counters, vfd_countables),
+      GROUP("VPC", vpc_counters, vpc_countables),
+      GROUP("VSC", vsc_counters, vsc_countables),
+      //       GROUP("VBIF", vbif_counters, vbif_countables),
 };
 
 const unsigned a5xx_num_perfcntr_groups = ARRAY_SIZE(a5xx_perfcntr_groups);
index 0fc4e7f..ada215e 100644 (file)
 
 static const struct fd_perfcntr_counter cp_counters[] = {
 //RESERVED: for kernel
-//     COUNTER(CP_PERFCTR_CP_SEL(0),  RBBM_PERFCTR_CP(0),  RBBM_PERFCTR_CP(0)+1),
-       COUNTER(CP_PERFCTR_CP_SEL(1),  RBBM_PERFCTR_CP(1),  RBBM_PERFCTR_CP(1)+1),
-       COUNTER(CP_PERFCTR_CP_SEL(2),  RBBM_PERFCTR_CP(2),  RBBM_PERFCTR_CP(2)+1),
-       COUNTER(CP_PERFCTR_CP_SEL(3),  RBBM_PERFCTR_CP(3),  RBBM_PERFCTR_CP(3)+1),
-       COUNTER(CP_PERFCTR_CP_SEL(4),  RBBM_PERFCTR_CP(4),  RBBM_PERFCTR_CP(4)+1),
-       COUNTER(CP_PERFCTR_CP_SEL(5),  RBBM_PERFCTR_CP(5),  RBBM_PERFCTR_CP(5)+1),
-       COUNTER(CP_PERFCTR_CP_SEL(6),  RBBM_PERFCTR_CP(6),  RBBM_PERFCTR_CP(6)+1),
-       COUNTER(CP_PERFCTR_CP_SEL(7),  RBBM_PERFCTR_CP(7),  RBBM_PERFCTR_CP(7)+1),
-       COUNTER(CP_PERFCTR_CP_SEL(8),  RBBM_PERFCTR_CP(8),  RBBM_PERFCTR_CP(8)+1),
-       COUNTER(CP_PERFCTR_CP_SEL(9),  RBBM_PERFCTR_CP(9),  RBBM_PERFCTR_CP(9)+1),
-       COUNTER(CP_PERFCTR_CP_SEL(10), RBBM_PERFCTR_CP(10), RBBM_PERFCTR_CP(10)+1),
-       COUNTER(CP_PERFCTR_CP_SEL(11), RBBM_PERFCTR_CP(11), RBBM_PERFCTR_CP(11)+1),
-       COUNTER(CP_PERFCTR_CP_SEL(12), RBBM_PERFCTR_CP(12), RBBM_PERFCTR_CP(12)+1),
-       COUNTER(CP_PERFCTR_CP_SEL(13), RBBM_PERFCTR_CP(13), RBBM_PERFCTR_CP(13)+1),
+//    COUNTER(CP_PERFCTR_CP_SEL(0),  RBBM_PERFCTR_CP(0),  RBBM_PERFCTR_CP(0)+1),
+      COUNTER(CP_PERFCTR_CP_SEL(1),  RBBM_PERFCTR_CP(1),  RBBM_PERFCTR_CP(1)+1),
+      COUNTER(CP_PERFCTR_CP_SEL(2),  RBBM_PERFCTR_CP(2),  RBBM_PERFCTR_CP(2)+1),
+      COUNTER(CP_PERFCTR_CP_SEL(3),  RBBM_PERFCTR_CP(3),  RBBM_PERFCTR_CP(3)+1),
+      COUNTER(CP_PERFCTR_CP_SEL(4),  RBBM_PERFCTR_CP(4),  RBBM_PERFCTR_CP(4)+1),
+      COUNTER(CP_PERFCTR_CP_SEL(5),  RBBM_PERFCTR_CP(5),  RBBM_PERFCTR_CP(5)+1),
+      COUNTER(CP_PERFCTR_CP_SEL(6),  RBBM_PERFCTR_CP(6),  RBBM_PERFCTR_CP(6)+1),
+      COUNTER(CP_PERFCTR_CP_SEL(7),  RBBM_PERFCTR_CP(7),  RBBM_PERFCTR_CP(7)+1),
+      COUNTER(CP_PERFCTR_CP_SEL(8),  RBBM_PERFCTR_CP(8),  RBBM_PERFCTR_CP(8)+1),
+      COUNTER(CP_PERFCTR_CP_SEL(9),  RBBM_PERFCTR_CP(9),  RBBM_PERFCTR_CP(9)+1),
+      COUNTER(CP_PERFCTR_CP_SEL(10), RBBM_PERFCTR_CP(10), RBBM_PERFCTR_CP(10)+1),
+      COUNTER(CP_PERFCTR_CP_SEL(11), RBBM_PERFCTR_CP(11), RBBM_PERFCTR_CP(11)+1),
+      COUNTER(CP_PERFCTR_CP_SEL(12), RBBM_PERFCTR_CP(12), RBBM_PERFCTR_CP(12)+1),
+      COUNTER(CP_PERFCTR_CP_SEL(13), RBBM_PERFCTR_CP(13), RBBM_PERFCTR_CP(13)+1),
 };
 
 static const struct fd_perfcntr_countable cp_countables[] = {
-       COUNTABLE(PERF_CP_ALWAYS_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_BUSY_GFX_CORE_IDLE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_NUM_PREEMPTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PREEMPTION_REACTION_DELAY, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PREEMPTION_SWITCH_OUT_TIME, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PREEMPTION_SWITCH_IN_TIME, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_DEAD_DRAWS_IN_BIN_RENDER, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PREDICATED_DRAWS_KILLED, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_MODE_SWITCH, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_ZPASS_DONE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_CONTEXT_DONE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_CACHE_FLUSH, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_LONG_PREEMPTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_I_CACHE_STARVE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_IDLE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_PM4_STARVE_RB_IB, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_PM4_STARVE_SDS, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_MRB_STARVE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_RRB_STARVE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_VSD_STARVE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_VSD_DECODE_STARVE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_PIPE_OUT_STALL, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_SYNC_STALL, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_PM4_WFI_STALL, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_SYS_WFI_STALL, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_T4_EXEC, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_LOAD_STATE_EXEC, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_SAVE_SDS_STATE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_DRAW_EXEC, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_CTXT_REG_BUNCH_EXEC, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_EXEC_PROFILED, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_MEMORY_POOL_EMPTY, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_MEMORY_POOL_SYNC_STALL, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_MEMORY_POOL_ABOVE_THRESH, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_AHB_WR_STALL_PRE_DRAWS, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_AHB_STALL_SQE_GMU, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_AHB_STALL_SQE_WR_OTHER, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_AHB_STALL_SQE_RD_OTHER, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_CLUSTER0_EMPTY, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_CLUSTER1_EMPTY, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_CLUSTER2_EMPTY, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_CLUSTER3_EMPTY, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_CLUSTER4_EMPTY, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_CLUSTER5_EMPTY, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PM4_DATA, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_PM4_HEADERS, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_VBIF_READ_BEATS, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_VBIF_WRITE_BEATS, UINT64, AVERAGE),
-       COUNTABLE(PERF_CP_SQE_INSTR_COUNTER, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_ALWAYS_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_BUSY_GFX_CORE_IDLE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_NUM_PREEMPTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PREEMPTION_REACTION_DELAY, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PREEMPTION_SWITCH_OUT_TIME, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PREEMPTION_SWITCH_IN_TIME, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_DEAD_DRAWS_IN_BIN_RENDER, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PREDICATED_DRAWS_KILLED, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_MODE_SWITCH, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_ZPASS_DONE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_CONTEXT_DONE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_CACHE_FLUSH, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_LONG_PREEMPTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_I_CACHE_STARVE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_IDLE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_PM4_STARVE_RB_IB, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_PM4_STARVE_SDS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_MRB_STARVE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_RRB_STARVE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_VSD_STARVE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_VSD_DECODE_STARVE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_PIPE_OUT_STALL, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_SYNC_STALL, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_PM4_WFI_STALL, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_SYS_WFI_STALL, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_T4_EXEC, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_LOAD_STATE_EXEC, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_SAVE_SDS_STATE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_DRAW_EXEC, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_CTXT_REG_BUNCH_EXEC, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_EXEC_PROFILED, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_MEMORY_POOL_EMPTY, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_MEMORY_POOL_SYNC_STALL, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_MEMORY_POOL_ABOVE_THRESH, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_AHB_WR_STALL_PRE_DRAWS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_AHB_STALL_SQE_GMU, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_AHB_STALL_SQE_WR_OTHER, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_AHB_STALL_SQE_RD_OTHER, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_CLUSTER0_EMPTY, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_CLUSTER1_EMPTY, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_CLUSTER2_EMPTY, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_CLUSTER3_EMPTY, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_CLUSTER4_EMPTY, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_CLUSTER5_EMPTY, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PM4_DATA, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_PM4_HEADERS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_VBIF_READ_BEATS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_VBIF_WRITE_BEATS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CP_SQE_INSTR_COUNTER, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter ccu_counters[] = {
-       COUNTER(RB_PERFCTR_CCU_SEL(0), RBBM_PERFCTR_CCU(0), RBBM_PERFCTR_CCU(0)+1),
-       COUNTER(RB_PERFCTR_CCU_SEL(1), RBBM_PERFCTR_CCU(1), RBBM_PERFCTR_CCU(1)+1),
-       COUNTER(RB_PERFCTR_CCU_SEL(2), RBBM_PERFCTR_CCU(2), RBBM_PERFCTR_CCU(2)+1),
-       COUNTER(RB_PERFCTR_CCU_SEL(3), RBBM_PERFCTR_CCU(3), RBBM_PERFCTR_CCU(3)+1),
-       COUNTER(RB_PERFCTR_CCU_SEL(4), RBBM_PERFCTR_CCU(4), RBBM_PERFCTR_CCU(4)+1),
+      COUNTER(RB_PERFCTR_CCU_SEL(0), RBBM_PERFCTR_CCU(0), RBBM_PERFCTR_CCU(0)+1),
+      COUNTER(RB_PERFCTR_CCU_SEL(1), RBBM_PERFCTR_CCU(1), RBBM_PERFCTR_CCU(1)+1),
+      COUNTER(RB_PERFCTR_CCU_SEL(2), RBBM_PERFCTR_CCU(2), RBBM_PERFCTR_CCU(2)+1),
+      COUNTER(RB_PERFCTR_CCU_SEL(3), RBBM_PERFCTR_CCU(3), RBBM_PERFCTR_CCU(3)+1),
+      COUNTER(RB_PERFCTR_CCU_SEL(4), RBBM_PERFCTR_CCU(4), RBBM_PERFCTR_CCU(4)+1),
 };
 
 static const struct fd_perfcntr_countable ccu_countables[] = {
-       COUNTABLE(PERF_CCU_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_STARVE_CYCLES_FLAG_RETURN, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_BLOCKS, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_BLOCKS, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_BLOCK_HIT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_BLOCK_HIT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_PARTIAL_BLOCK_READ, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_GMEM_READ, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_GMEM_WRITE, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_READ_FLAG0_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_READ_FLAG1_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_READ_FLAG2_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_READ_FLAG3_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_READ_FLAG4_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_READ_FLAG5_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_READ_FLAG6_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_DEPTH_READ_FLAG8_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_READ_FLAG0_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_READ_FLAG1_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_READ_FLAG2_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_READ_FLAG3_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_READ_FLAG4_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_READ_FLAG5_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_READ_FLAG6_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_COLOR_READ_FLAG8_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_2D_RD_REQ, UINT64, AVERAGE),
-       COUNTABLE(PERF_CCU_2D_WR_REQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_STARVE_CYCLES_FLAG_RETURN, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_BLOCKS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_BLOCKS, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_BLOCK_HIT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_BLOCK_HIT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_PARTIAL_BLOCK_READ, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_GMEM_READ, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_GMEM_WRITE, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_READ_FLAG0_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_READ_FLAG1_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_READ_FLAG2_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_READ_FLAG3_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_READ_FLAG4_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_READ_FLAG5_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_READ_FLAG6_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_DEPTH_READ_FLAG8_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_READ_FLAG0_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_READ_FLAG1_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_READ_FLAG2_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_READ_FLAG3_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_READ_FLAG4_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_READ_FLAG5_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_READ_FLAG6_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_COLOR_READ_FLAG8_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_2D_RD_REQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_CCU_2D_WR_REQ, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter tse_counters[] = {
-       COUNTER(GRAS_PERFCTR_TSE_SEL(0), RBBM_PERFCTR_TSE(0), RBBM_PERFCTR_TSE(0)+1),
-       COUNTER(GRAS_PERFCTR_TSE_SEL(1), RBBM_PERFCTR_TSE(1), RBBM_PERFCTR_TSE(1)+1),
-       COUNTER(GRAS_PERFCTR_TSE_SEL(2), RBBM_PERFCTR_TSE(2), RBBM_PERFCTR_TSE(2)+1),
-       COUNTER(GRAS_PERFCTR_TSE_SEL(3), RBBM_PERFCTR_TSE(3), RBBM_PERFCTR_TSE(3)+1),
+      COUNTER(GRAS_PERFCTR_TSE_SEL(0), RBBM_PERFCTR_TSE(0), RBBM_PERFCTR_TSE(0)+1),
+      COUNTER(GRAS_PERFCTR_TSE_SEL(1), RBBM_PERFCTR_TSE(1), RBBM_PERFCTR_TSE(1)+1),
+      COUNTER(GRAS_PERFCTR_TSE_SEL(2), RBBM_PERFCTR_TSE(2), RBBM_PERFCTR_TSE(2)+1),
+      COUNTER(GRAS_PERFCTR_TSE_SEL(3), RBBM_PERFCTR_TSE(3), RBBM_PERFCTR_TSE(3)+1),
 };
 
 static const struct fd_perfcntr_countable tse_countables[] = {
-       COUNTABLE(PERF_TSE_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_CLIPPING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_STALL_CYCLES_RAS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_STALL_CYCLES_LRZ_ZPLANE, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_STARVE_CYCLES_PC, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_INPUT_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_INPUT_NULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_TRIVAL_REJ_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_CLIPPED_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_ZERO_AREA_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_FACENESS_CULLED_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_ZERO_PIXEL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_OUTPUT_NULL_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_OUTPUT_VISIBLE_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_CINVOCATION, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_CPRIMITIVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_2D_INPUT_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_2D_ALIVE_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TSE_CLIP_PLANES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_CLIPPING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_STALL_CYCLES_RAS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_STALL_CYCLES_LRZ_ZPLANE, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_STARVE_CYCLES_PC, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_INPUT_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_INPUT_NULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_TRIVAL_REJ_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_CLIPPED_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_ZERO_AREA_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_FACENESS_CULLED_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_ZERO_PIXEL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_OUTPUT_NULL_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_OUTPUT_VISIBLE_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_CINVOCATION, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_CPRIMITIVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_2D_INPUT_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_2D_ALIVE_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TSE_CLIP_PLANES, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter ras_counters[] = {
-       COUNTER(GRAS_PERFCTR_RAS_SEL(0), RBBM_PERFCTR_RAS(0), RBBM_PERFCTR_RAS(0)+1),
-       COUNTER(GRAS_PERFCTR_RAS_SEL(1), RBBM_PERFCTR_RAS(1), RBBM_PERFCTR_RAS(1)+1),
-       COUNTER(GRAS_PERFCTR_RAS_SEL(2), RBBM_PERFCTR_RAS(2), RBBM_PERFCTR_RAS(2)+1),
-       COUNTER(GRAS_PERFCTR_RAS_SEL(3), RBBM_PERFCTR_RAS(3), RBBM_PERFCTR_RAS(3)+1),
+      COUNTER(GRAS_PERFCTR_RAS_SEL(0), RBBM_PERFCTR_RAS(0), RBBM_PERFCTR_RAS(0)+1),
+      COUNTER(GRAS_PERFCTR_RAS_SEL(1), RBBM_PERFCTR_RAS(1), RBBM_PERFCTR_RAS(1)+1),
+      COUNTER(GRAS_PERFCTR_RAS_SEL(2), RBBM_PERFCTR_RAS(2), RBBM_PERFCTR_RAS(2)+1),
+      COUNTER(GRAS_PERFCTR_RAS_SEL(3), RBBM_PERFCTR_RAS(3), RBBM_PERFCTR_RAS(3)+1),
 };
 
 static const struct fd_perfcntr_countable ras_countables[] = {
-       COUNTABLE(PERF_RAS_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_SUPERTILE_ACTIVE_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_STALL_CYCLES_LRZ, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_STARVE_CYCLES_TSE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_SUPER_TILES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_8X4_TILES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_MASKGEN_ACTIVE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_FULLY_COVERED_SUPER_TILES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_FULLY_COVERED_8X4_TILES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_PRIM_KILLED_INVISILBE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_SUPERTILE_GEN_ACTIVE_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_LRZ_INTF_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RAS_BLOCKS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_SUPERTILE_ACTIVE_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_STALL_CYCLES_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_STARVE_CYCLES_TSE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_SUPER_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_8X4_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_MASKGEN_ACTIVE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_FULLY_COVERED_SUPER_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_FULLY_COVERED_8X4_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_PRIM_KILLED_INVISILBE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_SUPERTILE_GEN_ACTIVE_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_LRZ_INTF_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RAS_BLOCKS, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter lrz_counters[] = {
-       COUNTER(GRAS_PERFCTR_LRZ_SEL(0), RBBM_PERFCTR_LRZ(0), RBBM_PERFCTR_LRZ(0)+1),
-       COUNTER(GRAS_PERFCTR_LRZ_SEL(1), RBBM_PERFCTR_LRZ(1), RBBM_PERFCTR_LRZ(1)+1),
-       COUNTER(GRAS_PERFCTR_LRZ_SEL(2), RBBM_PERFCTR_LRZ(2), RBBM_PERFCTR_LRZ(2)+1),
-       COUNTER(GRAS_PERFCTR_LRZ_SEL(3), RBBM_PERFCTR_LRZ(3), RBBM_PERFCTR_LRZ(3)+1),
+      COUNTER(GRAS_PERFCTR_LRZ_SEL(0), RBBM_PERFCTR_LRZ(0), RBBM_PERFCTR_LRZ(0)+1),
+      COUNTER(GRAS_PERFCTR_LRZ_SEL(1), RBBM_PERFCTR_LRZ(1), RBBM_PERFCTR_LRZ(1)+1),
+      COUNTER(GRAS_PERFCTR_LRZ_SEL(2), RBBM_PERFCTR_LRZ(2), RBBM_PERFCTR_LRZ(2)+1),
+      COUNTER(GRAS_PERFCTR_LRZ_SEL(3), RBBM_PERFCTR_LRZ(3), RBBM_PERFCTR_LRZ(3)+1),
 };
 
 static const struct fd_perfcntr_countable lrz_countables[] = {
-       COUNTABLE(PERF_LRZ_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_STARVE_CYCLES_RAS, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_STALL_CYCLES_RB, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_STALL_CYCLES_VSC, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_STALL_CYCLES_VPC, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_LRZ_READ, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_LRZ_WRITE, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_READ_LATENCY, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_MERGE_CACHE_UPDATING, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_PRIM_KILLED_BY_MASKGEN, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_PRIM_KILLED_BY_LRZ, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_FULL_8X8_TILES, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_PARTIAL_8X8_TILES, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_TILE_KILLED, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_TOTAL_PIXEL, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_FULLY_COVERED_TILES, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_PARTIAL_COVERED_TILES, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_FEEDBACK_ACCEPT, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_FEEDBACK_DISCARD, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_FEEDBACK_STALL, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_STALL_CYCLES_RB_ZPLANE, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_STALL_CYCLES_RB_BPLANE, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_STALL_CYCLES_VC, UINT64, AVERAGE),
-       COUNTABLE(PERF_LRZ_RAS_MASK_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_STARVE_CYCLES_RAS, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_STALL_CYCLES_RB, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_STALL_CYCLES_VSC, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_STALL_CYCLES_VPC, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_LRZ_READ, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_LRZ_WRITE, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_READ_LATENCY, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_MERGE_CACHE_UPDATING, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_PRIM_KILLED_BY_MASKGEN, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_PRIM_KILLED_BY_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_FULL_8X8_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_PARTIAL_8X8_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_TILE_KILLED, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_TOTAL_PIXEL, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_FULLY_COVERED_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_PARTIAL_COVERED_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_FEEDBACK_ACCEPT, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_FEEDBACK_DISCARD, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_FEEDBACK_STALL, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_STALL_CYCLES_RB_ZPLANE, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_STALL_CYCLES_RB_BPLANE, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_STALL_CYCLES_VC, UINT64, AVERAGE),
+      COUNTABLE(PERF_LRZ_RAS_MASK_TRANS, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter hlsq_counters[] = {
-       COUNTER(HLSQ_PERFCTR_HLSQ_SEL(0), RBBM_PERFCTR_HLSQ(0), RBBM_PERFCTR_HLSQ(0)+1),
-       COUNTER(HLSQ_PERFCTR_HLSQ_SEL(1), RBBM_PERFCTR_HLSQ(1), RBBM_PERFCTR_HLSQ(1)+1),
-       COUNTER(HLSQ_PERFCTR_HLSQ_SEL(2), RBBM_PERFCTR_HLSQ(2), RBBM_PERFCTR_HLSQ(2)+1),
-       COUNTER(HLSQ_PERFCTR_HLSQ_SEL(3), RBBM_PERFCTR_HLSQ(3), RBBM_PERFCTR_HLSQ(3)+1),
-       COUNTER(HLSQ_PERFCTR_HLSQ_SEL(4), RBBM_PERFCTR_HLSQ(4), RBBM_PERFCTR_HLSQ(4)+1),
-       COUNTER(HLSQ_PERFCTR_HLSQ_SEL(5), RBBM_PERFCTR_HLSQ(5), RBBM_PERFCTR_HLSQ(5)+1),
-// TODO did we loose some HLSQ counters or are they just missing from xml
-//     COUNTER(HLSQ_PERFCTR_HLSQ_SEL(6), RBBM_PERFCTR_HLSQ(6), RBBM_PERFCTR_HLSQ(6)+1),
-//     COUNTER(HLSQ_PERFCTR_HLSQ_SEL(7), RBBM_PERFCTR_HLSQ(7), RBBM_PERFCTR_HLSQ(7)+1),
+      COUNTER(HLSQ_PERFCTR_HLSQ_SEL(0), RBBM_PERFCTR_HLSQ(0), RBBM_PERFCTR_HLSQ(0)+1),
+      COUNTER(HLSQ_PERFCTR_HLSQ_SEL(1), RBBM_PERFCTR_HLSQ(1), RBBM_PERFCTR_HLSQ(1)+1),
+      COUNTER(HLSQ_PERFCTR_HLSQ_SEL(2), RBBM_PERFCTR_HLSQ(2), RBBM_PERFCTR_HLSQ(2)+1),
+      COUNTER(HLSQ_PERFCTR_HLSQ_SEL(3), RBBM_PERFCTR_HLSQ(3), RBBM_PERFCTR_HLSQ(3)+1),
+      COUNTER(HLSQ_PERFCTR_HLSQ_SEL(4), RBBM_PERFCTR_HLSQ(4), RBBM_PERFCTR_HLSQ(4)+1),
+      COUNTER(HLSQ_PERFCTR_HLSQ_SEL(5), RBBM_PERFCTR_HLSQ(5), RBBM_PERFCTR_HLSQ(5)+1),
+      // TODO did we loose some HLSQ counters or are they just missing from xml
+      //       COUNTER(HLSQ_PERFCTR_HLSQ_SEL(6), RBBM_PERFCTR_HLSQ(6), RBBM_PERFCTR_HLSQ(6)+1),
+      //       COUNTER(HLSQ_PERFCTR_HLSQ_SEL(7), RBBM_PERFCTR_HLSQ(7), RBBM_PERFCTR_HLSQ(7)+1),
 };
 
 static const struct fd_perfcntr_countable hlsq_countables[] = {
-       COUNTABLE(PERF_HLSQ_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_STALL_CYCLES_SP_STATE, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_UCHE_LATENCY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_UCHE_LATENCY_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_FS_STAGE_1X_WAVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_FS_STAGE_2X_WAVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_QUADS, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_CS_INVOCATIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_COMPUTE_DRAWCALLS, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_FS_DATA_WAIT_PROGRAMMING, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_DUAL_FS_PROG_ACTIVE, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_DUAL_VS_PROG_ACTIVE, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_FS_BATCH_COUNT_ZERO, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_VS_BATCH_COUNT_ZERO, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_WAVE_PENDING_NO_QUAD, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_WAVE_PENDING_NO_PRIM_BASE, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_STALL_CYCLES_VPC, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_PIXELS, UINT64, AVERAGE),
-       COUNTABLE(PERF_HLSQ_DRAW_MODE_SWITCH_VSFS_SYNC, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_STALL_CYCLES_SP_STATE, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_UCHE_LATENCY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_UCHE_LATENCY_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_FS_STAGE_1X_WAVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_FS_STAGE_2X_WAVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_QUADS, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_CS_INVOCATIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_COMPUTE_DRAWCALLS, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_FS_DATA_WAIT_PROGRAMMING, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_DUAL_FS_PROG_ACTIVE, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_DUAL_VS_PROG_ACTIVE, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_FS_BATCH_COUNT_ZERO, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_VS_BATCH_COUNT_ZERO, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_WAVE_PENDING_NO_QUAD, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_WAVE_PENDING_NO_PRIM_BASE, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_STALL_CYCLES_VPC, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_PIXELS, UINT64, AVERAGE),
+      COUNTABLE(PERF_HLSQ_DRAW_MODE_SWITCH_VSFS_SYNC, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter pc_counters[] = {
-       COUNTER(PC_PERFCTR_PC_SEL(0), RBBM_PERFCTR_PC(0), RBBM_PERFCTR_PC(0)+1),
-       COUNTER(PC_PERFCTR_PC_SEL(1), RBBM_PERFCTR_PC(1), RBBM_PERFCTR_PC(1)+1),
-       COUNTER(PC_PERFCTR_PC_SEL(2), RBBM_PERFCTR_PC(2), RBBM_PERFCTR_PC(2)+1),
-       COUNTER(PC_PERFCTR_PC_SEL(3), RBBM_PERFCTR_PC(3), RBBM_PERFCTR_PC(3)+1),
-       COUNTER(PC_PERFCTR_PC_SEL(4), RBBM_PERFCTR_PC(4), RBBM_PERFCTR_PC(4)+1),
-       COUNTER(PC_PERFCTR_PC_SEL(5), RBBM_PERFCTR_PC(5), RBBM_PERFCTR_PC(5)+1),
-       COUNTER(PC_PERFCTR_PC_SEL(6), RBBM_PERFCTR_PC(6), RBBM_PERFCTR_PC(6)+1),
-       COUNTER(PC_PERFCTR_PC_SEL(7), RBBM_PERFCTR_PC(7), RBBM_PERFCTR_PC(7)+1),
+      COUNTER(PC_PERFCTR_PC_SEL(0), RBBM_PERFCTR_PC(0), RBBM_PERFCTR_PC(0)+1),
+      COUNTER(PC_PERFCTR_PC_SEL(1), RBBM_PERFCTR_PC(1), RBBM_PERFCTR_PC(1)+1),
+      COUNTER(PC_PERFCTR_PC_SEL(2), RBBM_PERFCTR_PC(2), RBBM_PERFCTR_PC(2)+1),
+      COUNTER(PC_PERFCTR_PC_SEL(3), RBBM_PERFCTR_PC(3), RBBM_PERFCTR_PC(3)+1),
+      COUNTER(PC_PERFCTR_PC_SEL(4), RBBM_PERFCTR_PC(4), RBBM_PERFCTR_PC(4)+1),
+      COUNTER(PC_PERFCTR_PC_SEL(5), RBBM_PERFCTR_PC(5), RBBM_PERFCTR_PC(5)+1),
+      COUNTER(PC_PERFCTR_PC_SEL(6), RBBM_PERFCTR_PC(6), RBBM_PERFCTR_PC(6)+1),
+      COUNTER(PC_PERFCTR_PC_SEL(7), RBBM_PERFCTR_PC(7), RBBM_PERFCTR_PC(7)+1),
 };
 
 static const struct fd_perfcntr_countable pc_countables[] = {
-       COUNTABLE(PERF_PC_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STALL_CYCLES_VFD, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STALL_CYCLES_TSE, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STALL_CYCLES_VPC, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STALL_CYCLES_TESS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STALL_CYCLES_TSE_ONLY, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STALL_CYCLES_VPC_ONLY, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_PASS1_TF_STALL_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_INDEX, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_POSITION, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_STARVE_CYCLES_DI, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_VIS_STREAMS_LOADED, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_INSTANCES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_VPC_PRIMITIVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_DEAD_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_LIVE_PRIM, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_VERTEX_HITS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_IA_VERTICES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_IA_PRIMITIVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_GS_PRIMITIVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_HS_INVOCATIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_DS_INVOCATIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_VS_INVOCATIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_GS_INVOCATIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_DS_PRIMITIVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_VPC_POS_DATA_TRANSACTION, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_3D_DRAWCALLS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_2D_DRAWCALLS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TESS_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TESS_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TESS_STALL_CYCLES_PC, UINT64, AVERAGE),
-       COUNTABLE(PERF_TESS_STARVE_CYCLES_PC, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_TSE_TRANSACTION, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_TSE_VERTEX, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_TESS_PC_UV_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_TESS_PC_UV_PATCHES, UINT64, AVERAGE),
-       COUNTABLE(PERF_PC_TESS_FACTOR_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STALL_CYCLES_VFD, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STALL_CYCLES_TSE, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STALL_CYCLES_VPC, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STALL_CYCLES_TESS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STALL_CYCLES_TSE_ONLY, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STALL_CYCLES_VPC_ONLY, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_PASS1_TF_STALL_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_INDEX, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_POSITION, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_STARVE_CYCLES_DI, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_VIS_STREAMS_LOADED, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_INSTANCES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_VPC_PRIMITIVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_DEAD_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_LIVE_PRIM, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_VERTEX_HITS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_IA_VERTICES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_IA_PRIMITIVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_GS_PRIMITIVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_HS_INVOCATIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_DS_INVOCATIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_VS_INVOCATIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_GS_INVOCATIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_DS_PRIMITIVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_VPC_POS_DATA_TRANSACTION, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_3D_DRAWCALLS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_2D_DRAWCALLS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TESS_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TESS_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TESS_STALL_CYCLES_PC, UINT64, AVERAGE),
+      COUNTABLE(PERF_TESS_STARVE_CYCLES_PC, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_TSE_TRANSACTION, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_TSE_VERTEX, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_TESS_PC_UV_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_TESS_PC_UV_PATCHES, UINT64, AVERAGE),
+      COUNTABLE(PERF_PC_TESS_FACTOR_TRANS, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter rb_counters[] = {
-       COUNTER(RB_PERFCTR_RB_SEL(0), RBBM_PERFCTR_RB(0), RBBM_PERFCTR_RB(0)+1),
-       COUNTER(RB_PERFCTR_RB_SEL(1), RBBM_PERFCTR_RB(1), RBBM_PERFCTR_RB(1)+1),
-       COUNTER(RB_PERFCTR_RB_SEL(2), RBBM_PERFCTR_RB(2), RBBM_PERFCTR_RB(2)+1),
-       COUNTER(RB_PERFCTR_RB_SEL(3), RBBM_PERFCTR_RB(3), RBBM_PERFCTR_RB(3)+1),
-       COUNTER(RB_PERFCTR_RB_SEL(4), RBBM_PERFCTR_RB(4), RBBM_PERFCTR_RB(4)+1),
-       COUNTER(RB_PERFCTR_RB_SEL(5), RBBM_PERFCTR_RB(5), RBBM_PERFCTR_RB(5)+1),
-       COUNTER(RB_PERFCTR_RB_SEL(6), RBBM_PERFCTR_RB(6), RBBM_PERFCTR_RB(6)+1),
-       COUNTER(RB_PERFCTR_RB_SEL(7), RBBM_PERFCTR_RB(7), RBBM_PERFCTR_RB(7)+1),
+      COUNTER(RB_PERFCTR_RB_SEL(0), RBBM_PERFCTR_RB(0), RBBM_PERFCTR_RB(0)+1),
+      COUNTER(RB_PERFCTR_RB_SEL(1), RBBM_PERFCTR_RB(1), RBBM_PERFCTR_RB(1)+1),
+      COUNTER(RB_PERFCTR_RB_SEL(2), RBBM_PERFCTR_RB(2), RBBM_PERFCTR_RB(2)+1),
+      COUNTER(RB_PERFCTR_RB_SEL(3), RBBM_PERFCTR_RB(3), RBBM_PERFCTR_RB(3)+1),
+      COUNTER(RB_PERFCTR_RB_SEL(4), RBBM_PERFCTR_RB(4), RBBM_PERFCTR_RB(4)+1),
+      COUNTER(RB_PERFCTR_RB_SEL(5), RBBM_PERFCTR_RB(5), RBBM_PERFCTR_RB(5)+1),
+      COUNTER(RB_PERFCTR_RB_SEL(6), RBBM_PERFCTR_RB(6), RBBM_PERFCTR_RB(6)+1),
+      COUNTER(RB_PERFCTR_RB_SEL(7), RBBM_PERFCTR_RB(7), RBBM_PERFCTR_RB(7)+1),
 };
 
 static const struct fd_perfcntr_countable rb_countables[] = {
-       COUNTABLE(PERF_RB_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STALL_CYCLES_HLSQ, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STALL_CYCLES_FIFO0_FULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STALL_CYCLES_FIFO1_FULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STALL_CYCLES_FIFO2_FULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STARVE_CYCLES_SP, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STARVE_CYCLES_LRZ_TILE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STARVE_CYCLES_CCU, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STARVE_CYCLES_Z_PLANE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STARVE_CYCLES_BARY_PLANE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_Z_WORKLOAD, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_HLSQ_ACTIVE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_Z_READ, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_Z_WRITE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_C_READ, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_C_WRITE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_TOTAL_PASS, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_Z_PASS, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_Z_FAIL, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_S_FAIL, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_BLENDED_FXP_COMPONENTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_BLENDED_FP16_COMPONENTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_PS_INVOCATIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_2D_ALIVE_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_2D_STALL_CYCLES_A2D, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_2D_STARVE_CYCLES_SRC, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_2D_STARVE_CYCLES_SP, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_2D_STARVE_CYCLES_DST, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_2D_VALID_PIXELS, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_3D_PIXELS, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_BLENDER_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_ZPROC_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_CPROC_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_SAMPLER_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STALL_CYCLES_CCU_COLOR_READ, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STALL_CYCLES_CCU_COLOR_WRITE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STALL_CYCLES_CCU_DEPTH_READ, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STALL_CYCLES_CCU_DEPTH_WRITE, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STALL_CYCLES_VPC, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_2D_INPUT_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_2D_OUTPUT_RB_DST_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_2D_OUTPUT_RB_SRC_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_BLENDED_FP32_COMPONENTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_COLOR_PIX_TILES, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_STALL_CYCLES_CCU, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_EARLY_Z_ARB3_GRANT, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_LATE_Z_ARB3_GRANT, UINT64, AVERAGE),
-       COUNTABLE(PERF_RB_EARLY_Z_SKIP_GRANT, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STALL_CYCLES_HLSQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STALL_CYCLES_FIFO0_FULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STALL_CYCLES_FIFO1_FULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STALL_CYCLES_FIFO2_FULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STARVE_CYCLES_SP, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STARVE_CYCLES_LRZ_TILE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STARVE_CYCLES_CCU, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STARVE_CYCLES_Z_PLANE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STARVE_CYCLES_BARY_PLANE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_Z_WORKLOAD, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_HLSQ_ACTIVE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_Z_READ, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_Z_WRITE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_C_READ, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_C_WRITE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_TOTAL_PASS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_Z_PASS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_Z_FAIL, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_S_FAIL, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_BLENDED_FXP_COMPONENTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_BLENDED_FP16_COMPONENTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_PS_INVOCATIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_2D_ALIVE_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_2D_STALL_CYCLES_A2D, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_2D_STARVE_CYCLES_SRC, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_2D_STARVE_CYCLES_SP, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_2D_STARVE_CYCLES_DST, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_2D_VALID_PIXELS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_3D_PIXELS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_BLENDER_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_ZPROC_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_CPROC_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_SAMPLER_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STALL_CYCLES_CCU_COLOR_READ, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STALL_CYCLES_CCU_COLOR_WRITE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STALL_CYCLES_CCU_DEPTH_READ, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STALL_CYCLES_CCU_DEPTH_WRITE, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STALL_CYCLES_VPC, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_2D_INPUT_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_2D_OUTPUT_RB_DST_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_2D_OUTPUT_RB_SRC_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_BLENDED_FP32_COMPONENTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_COLOR_PIX_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_STALL_CYCLES_CCU, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_EARLY_Z_ARB3_GRANT, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_LATE_Z_ARB3_GRANT, UINT64, AVERAGE),
+      COUNTABLE(PERF_RB_EARLY_Z_SKIP_GRANT, UINT64, AVERAGE),
 };
 
 UNUSED static const struct fd_perfcntr_counter rbbm_counters[] = {
-//RESERVED: for kernel
-//     COUNTER(RBBM_PERFCTR_RBBM_SEL(0), RBBM_PERFCTR_RBBM(0), RBBM_PERFCTR_RBBM(0)+1),
-       COUNTER(RBBM_PERFCTR_RBBM_SEL(1), RBBM_PERFCTR_RBBM(1), RBBM_PERFCTR_RBBM(1)+1),
-       COUNTER(RBBM_PERFCTR_RBBM_SEL(2), RBBM_PERFCTR_RBBM(2), RBBM_PERFCTR_RBBM(2)+1),
-       COUNTER(RBBM_PERFCTR_RBBM_SEL(3), RBBM_PERFCTR_RBBM(3), RBBM_PERFCTR_RBBM(3)+1),
+      //RESERVED: for kernel
+      //       COUNTER(RBBM_PERFCTR_RBBM_SEL(0), RBBM_PERFCTR_RBBM(0), RBBM_PERFCTR_RBBM(0)+1),
+      COUNTER(RBBM_PERFCTR_RBBM_SEL(1), RBBM_PERFCTR_RBBM(1), RBBM_PERFCTR_RBBM(1)+1),
+      COUNTER(RBBM_PERFCTR_RBBM_SEL(2), RBBM_PERFCTR_RBBM(2), RBBM_PERFCTR_RBBM(2)+1),
+      COUNTER(RBBM_PERFCTR_RBBM_SEL(3), RBBM_PERFCTR_RBBM(3), RBBM_PERFCTR_RBBM(3)+1),
 };
 
 UNUSED static const struct fd_perfcntr_countable rbbm_countables[] = {
-       COUNTABLE(PERF_RBBM_ALWAYS_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_ALWAYS_ON, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_TSE_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_RAS_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_PC_DCALL_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_PC_VSD_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_STATUS_MASKED, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_COM_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_DCOM_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_VBIF_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_VSC_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_TESS_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_UCHE_BUSY, UINT64, AVERAGE),
-       COUNTABLE(PERF_RBBM_HLSQ_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_ALWAYS_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_ALWAYS_ON, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_TSE_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_RAS_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_PC_DCALL_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_PC_VSD_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_STATUS_MASKED, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_COM_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_DCOM_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_VBIF_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_VSC_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_TESS_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_UCHE_BUSY, UINT64, AVERAGE),
+      COUNTABLE(PERF_RBBM_HLSQ_BUSY, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter sp_counters[] = {
-//RESERVED: for kernel
-//     COUNTER(SP_PERFCTR_SP_SEL(0),  RBBM_PERFCTR_SP(0),  RBBM_PERFCTR_SP(0)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(1),  RBBM_PERFCTR_SP(1),  RBBM_PERFCTR_SP(1)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(2),  RBBM_PERFCTR_SP(2),  RBBM_PERFCTR_SP(2)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(3),  RBBM_PERFCTR_SP(3),  RBBM_PERFCTR_SP(3)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(4),  RBBM_PERFCTR_SP(4),  RBBM_PERFCTR_SP(4)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(5),  RBBM_PERFCTR_SP(5),  RBBM_PERFCTR_SP(5)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(6),  RBBM_PERFCTR_SP(6),  RBBM_PERFCTR_SP(6)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(7),  RBBM_PERFCTR_SP(7),  RBBM_PERFCTR_SP(7)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(8),  RBBM_PERFCTR_SP(8),  RBBM_PERFCTR_SP(8)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(9),  RBBM_PERFCTR_SP(9),  RBBM_PERFCTR_SP(9)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(10), RBBM_PERFCTR_SP(10), RBBM_PERFCTR_SP(10)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(11), RBBM_PERFCTR_SP(11), RBBM_PERFCTR_SP(11)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(12), RBBM_PERFCTR_SP(12), RBBM_PERFCTR_SP(12)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(13), RBBM_PERFCTR_SP(13), RBBM_PERFCTR_SP(13)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(14), RBBM_PERFCTR_SP(14), RBBM_PERFCTR_SP(14)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(15), RBBM_PERFCTR_SP(15), RBBM_PERFCTR_SP(15)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(16), RBBM_PERFCTR_SP(16), RBBM_PERFCTR_SP(16)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(17), RBBM_PERFCTR_SP(17), RBBM_PERFCTR_SP(17)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(18), RBBM_PERFCTR_SP(18), RBBM_PERFCTR_SP(18)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(19), RBBM_PERFCTR_SP(19), RBBM_PERFCTR_SP(19)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(20), RBBM_PERFCTR_SP(20), RBBM_PERFCTR_SP(20)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(21), RBBM_PERFCTR_SP(21), RBBM_PERFCTR_SP(21)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(22), RBBM_PERFCTR_SP(22), RBBM_PERFCTR_SP(22)+1),
-       COUNTER(SP_PERFCTR_SP_SEL(23), RBBM_PERFCTR_SP(23), RBBM_PERFCTR_SP(23)+1),
+      //RESERVED: for kernel
+      //       COUNTER(SP_PERFCTR_SP_SEL(0),  RBBM_PERFCTR_SP(0),  RBBM_PERFCTR_SP(0)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(1),  RBBM_PERFCTR_SP(1),  RBBM_PERFCTR_SP(1)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(2),  RBBM_PERFCTR_SP(2),  RBBM_PERFCTR_SP(2)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(3),  RBBM_PERFCTR_SP(3),  RBBM_PERFCTR_SP(3)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(4),  RBBM_PERFCTR_SP(4),  RBBM_PERFCTR_SP(4)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(5),  RBBM_PERFCTR_SP(5),  RBBM_PERFCTR_SP(5)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(6),  RBBM_PERFCTR_SP(6),  RBBM_PERFCTR_SP(6)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(7),  RBBM_PERFCTR_SP(7),  RBBM_PERFCTR_SP(7)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(8),  RBBM_PERFCTR_SP(8),  RBBM_PERFCTR_SP(8)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(9),  RBBM_PERFCTR_SP(9),  RBBM_PERFCTR_SP(9)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(10), RBBM_PERFCTR_SP(10), RBBM_PERFCTR_SP(10)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(11), RBBM_PERFCTR_SP(11), RBBM_PERFCTR_SP(11)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(12), RBBM_PERFCTR_SP(12), RBBM_PERFCTR_SP(12)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(13), RBBM_PERFCTR_SP(13), RBBM_PERFCTR_SP(13)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(14), RBBM_PERFCTR_SP(14), RBBM_PERFCTR_SP(14)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(15), RBBM_PERFCTR_SP(15), RBBM_PERFCTR_SP(15)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(16), RBBM_PERFCTR_SP(16), RBBM_PERFCTR_SP(16)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(17), RBBM_PERFCTR_SP(17), RBBM_PERFCTR_SP(17)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(18), RBBM_PERFCTR_SP(18), RBBM_PERFCTR_SP(18)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(19), RBBM_PERFCTR_SP(19), RBBM_PERFCTR_SP(19)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(20), RBBM_PERFCTR_SP(20), RBBM_PERFCTR_SP(20)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(21), RBBM_PERFCTR_SP(21), RBBM_PERFCTR_SP(21)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(22), RBBM_PERFCTR_SP(22), RBBM_PERFCTR_SP(22)+1),
+      COUNTER(SP_PERFCTR_SP_SEL(23), RBBM_PERFCTR_SP(23), RBBM_PERFCTR_SP(23)+1),
 };
 
 static const struct fd_perfcntr_countable sp_countables[] = {
-       COUNTABLE(PERF_SP_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_ALU_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_EFU_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_STALL_CYCLES_VPC, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_STALL_CYCLES_TP, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_STALL_CYCLES_RB, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_NON_EXECUTION_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_CONTEXTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_CONTEXT_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_WAVE_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_WAVE_SAMPLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_WAVE_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_WAVE_SAMPLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_DURATION_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_DURATION_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_CTRL_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_LOAD_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_EMIT_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_NOP_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_WAIT_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_FETCH_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_IDLE_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_END_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_LONG_SYNC_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_SHORT_SYNC_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WAVE_JOIN_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_LM_LOAD_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_LM_STORE_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_LM_ATOMICS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GM_LOAD_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GM_STORE_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GM_ATOMICS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_TEX_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_BARY_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_ADDR_LOCK_COUNT, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_UCHE_READ_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_UCHE_WRITE_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_EXPORT_VPC_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_EXPORT_RB_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_PIXELS_KILLED, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_ICL1_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_ICL1_MISSES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_HS_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_DS_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GS_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_CS_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GPR_READ, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GPR_WRITE, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FS_STAGE_HALF_EFU_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_VS_STAGE_HALF_EFU_INSTRUCTIONS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_LM_BANK_CONFLICTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_TEX_CONTROL_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_LOAD_CONTROL_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_FLOW_CONTROL_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_LM_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_DISPATCHER_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_SEQUENCER_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_LOW_EFFICIENCY_STARVED_BY_TP, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_STARVE_CYCLES_HLSQ, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_NON_EXECUTION_LS_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WORKING_EU, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_ANY_EU_WORKING, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WORKING_EU_FS_STAGE, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_ANY_EU_WORKING_FS_STAGE, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WORKING_EU_VS_STAGE, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_ANY_EU_WORKING_VS_STAGE, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_WORKING_EU_CS_STAGE, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_ANY_EU_WORKING_CS_STAGE, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GPR_READ_PREFETCH, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GPR_READ_CONFLICT, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GPR_WRITE_CONFLICT, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GM_LOAD_LATENCY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_GM_LOAD_LATENCY_SAMPLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_SP_EXECUTABLE_WAVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_ALU_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_EFU_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_STALL_CYCLES_VPC, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_STALL_CYCLES_TP, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_STALL_CYCLES_RB, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_NON_EXECUTION_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_CONTEXTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_CONTEXT_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_WAVE_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_WAVE_SAMPLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_WAVE_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_WAVE_SAMPLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_DURATION_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_DURATION_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_CTRL_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_LOAD_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_EMIT_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_NOP_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_WAIT_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_FETCH_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_IDLE_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_END_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_LONG_SYNC_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_SHORT_SYNC_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WAVE_JOIN_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_LM_LOAD_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_LM_STORE_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_LM_ATOMICS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GM_LOAD_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GM_STORE_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GM_ATOMICS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_TEX_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_BARY_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_ADDR_LOCK_COUNT, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_UCHE_READ_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_UCHE_WRITE_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_EXPORT_VPC_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_EXPORT_RB_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_PIXELS_KILLED, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_ICL1_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_ICL1_MISSES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_HS_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_DS_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GS_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_CS_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GPR_READ, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GPR_WRITE, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FS_STAGE_HALF_EFU_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_VS_STAGE_HALF_EFU_INSTRUCTIONS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_LM_BANK_CONFLICTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_TEX_CONTROL_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_LOAD_CONTROL_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_FLOW_CONTROL_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_LM_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_DISPATCHER_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_SEQUENCER_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_LOW_EFFICIENCY_STARVED_BY_TP, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_STARVE_CYCLES_HLSQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_NON_EXECUTION_LS_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WORKING_EU, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_ANY_EU_WORKING, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WORKING_EU_FS_STAGE, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_ANY_EU_WORKING_FS_STAGE, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WORKING_EU_VS_STAGE, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_ANY_EU_WORKING_VS_STAGE, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_WORKING_EU_CS_STAGE, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_ANY_EU_WORKING_CS_STAGE, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GPR_READ_PREFETCH, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GPR_READ_CONFLICT, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GPR_WRITE_CONFLICT, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GM_LOAD_LATENCY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_GM_LOAD_LATENCY_SAMPLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_SP_EXECUTABLE_WAVES, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter tp_counters[] = {
-       COUNTER(TPL1_PERFCTR_TP_SEL(0),  RBBM_PERFCTR_TP(0),  RBBM_PERFCTR_TP(0)+1),
-       COUNTER(TPL1_PERFCTR_TP_SEL(1),  RBBM_PERFCTR_TP(1),  RBBM_PERFCTR_TP(1)+1),
-       COUNTER(TPL1_PERFCTR_TP_SEL(2),  RBBM_PERFCTR_TP(2),  RBBM_PERFCTR_TP(2)+1),
-       COUNTER(TPL1_PERFCTR_TP_SEL(3),  RBBM_PERFCTR_TP(3),  RBBM_PERFCTR_TP(3)+1),
-       COUNTER(TPL1_PERFCTR_TP_SEL(4),  RBBM_PERFCTR_TP(4),  RBBM_PERFCTR_TP(4)+1),
-       COUNTER(TPL1_PERFCTR_TP_SEL(5),  RBBM_PERFCTR_TP(5),  RBBM_PERFCTR_TP(5)+1),
-       COUNTER(TPL1_PERFCTR_TP_SEL(6),  RBBM_PERFCTR_TP(6),  RBBM_PERFCTR_TP(6)+1),
-       COUNTER(TPL1_PERFCTR_TP_SEL(7),  RBBM_PERFCTR_TP(7),  RBBM_PERFCTR_TP(7)+1),
-       COUNTER(TPL1_PERFCTR_TP_SEL(8),  RBBM_PERFCTR_TP(8),  RBBM_PERFCTR_TP(8)+1),
-       COUNTER(TPL1_PERFCTR_TP_SEL(9),  RBBM_PERFCTR_TP(9),  RBBM_PERFCTR_TP(9)+1),
-       COUNTER(TPL1_PERFCTR_TP_SEL(10), RBBM_PERFCTR_TP(10), RBBM_PERFCTR_TP(10)+1),
-       COUNTER(TPL1_PERFCTR_TP_SEL(11), RBBM_PERFCTR_TP(11), RBBM_PERFCTR_TP(11)+1),
+      COUNTER(TPL1_PERFCTR_TP_SEL(0),  RBBM_PERFCTR_TP(0),  RBBM_PERFCTR_TP(0)+1),
+      COUNTER(TPL1_PERFCTR_TP_SEL(1),  RBBM_PERFCTR_TP(1),  RBBM_PERFCTR_TP(1)+1),
+      COUNTER(TPL1_PERFCTR_TP_SEL(2),  RBBM_PERFCTR_TP(2),  RBBM_PERFCTR_TP(2)+1),
+      COUNTER(TPL1_PERFCTR_TP_SEL(3),  RBBM_PERFCTR_TP(3),  RBBM_PERFCTR_TP(3)+1),
+      COUNTER(TPL1_PERFCTR_TP_SEL(4),  RBBM_PERFCTR_TP(4),  RBBM_PERFCTR_TP(4)+1),
+      COUNTER(TPL1_PERFCTR_TP_SEL(5),  RBBM_PERFCTR_TP(5),  RBBM_PERFCTR_TP(5)+1),
+      COUNTER(TPL1_PERFCTR_TP_SEL(6),  RBBM_PERFCTR_TP(6),  RBBM_PERFCTR_TP(6)+1),
+      COUNTER(TPL1_PERFCTR_TP_SEL(7),  RBBM_PERFCTR_TP(7),  RBBM_PERFCTR_TP(7)+1),
+      COUNTER(TPL1_PERFCTR_TP_SEL(8),  RBBM_PERFCTR_TP(8),  RBBM_PERFCTR_TP(8)+1),
+      COUNTER(TPL1_PERFCTR_TP_SEL(9),  RBBM_PERFCTR_TP(9),  RBBM_PERFCTR_TP(9)+1),
+      COUNTER(TPL1_PERFCTR_TP_SEL(10), RBBM_PERFCTR_TP(10), RBBM_PERFCTR_TP(10)+1),
+      COUNTER(TPL1_PERFCTR_TP_SEL(11), RBBM_PERFCTR_TP(11), RBBM_PERFCTR_TP(11)+1),
 };
 
 static const struct fd_perfcntr_countable tp_countables[] = {
-       COUNTABLE(PERF_TP_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_LATENCY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_LATENCY_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_FLAG_CACHE_REQUEST_SAMPLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_FLAG_CACHE_REQUEST_LATENCY, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_CACHELINE_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_CACHELINE_MISSES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_SP_TP_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_TP_SP_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_OUTPUT_PIXELS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_FILTER_WORKLOAD_16BIT, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_FILTER_WORKLOAD_32BIT, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_RECEIVED, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_OFFSET, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_SHADOW, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_ARRAY, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_GRADIENT, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_1D, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_2D, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_BUFFER, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_3D, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_CUBE, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_DIVERGENT_QUADS_RECEIVED, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_PRT_NON_RESIDENT_EVENTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_OUTPUT_PIXELS_POINT, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_OUTPUT_PIXELS_BILINEAR, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_OUTPUT_PIXELS_MIP, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_OUTPUT_PIXELS_ANISO, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_FLAG_CACHE_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_FLAG_CACHE_MISSES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_5_L2_REQUESTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS_POINT, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS_BILINEAR, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_2D_FILTER_WORKLOAD_16BIT, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_2D_FILTER_WORKLOAD_32BIT, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_TPA2TPC_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_MISSES_ASTC_1TILE, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_MISSES_ASTC_2TILE, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_MISSES_ASTC_4TILE, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_5_L2_COMPRESS_REQS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_5_L2_COMPRESS_MISS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_BANK_CONFLICT, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_5_MISS_LATENCY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_5_MISS_LATENCY_TRANS, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_QUADS_CONSTANT_MULTIPLIED, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_FRONTEND_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_TAG_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_DATA_WRITE_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_PRE_L1_DECOM_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_BACKEND_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_FLAG_CACHE_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_L1_5_CACHE_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_STARVE_CYCLES_SP, UINT64, AVERAGE),
-       COUNTABLE(PERF_TP_STARVE_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_LATENCY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_LATENCY_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_FLAG_CACHE_REQUEST_SAMPLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_FLAG_CACHE_REQUEST_LATENCY, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_CACHELINE_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_CACHELINE_MISSES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_SP_TP_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_TP_SP_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_OUTPUT_PIXELS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_FILTER_WORKLOAD_16BIT, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_FILTER_WORKLOAD_32BIT, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_RECEIVED, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_OFFSET, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_SHADOW, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_ARRAY, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_GRADIENT, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_1D, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_2D, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_BUFFER, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_3D, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_CUBE, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_DIVERGENT_QUADS_RECEIVED, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_PRT_NON_RESIDENT_EVENTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_OUTPUT_PIXELS_POINT, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_OUTPUT_PIXELS_BILINEAR, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_OUTPUT_PIXELS_MIP, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_OUTPUT_PIXELS_ANISO, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_FLAG_CACHE_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_FLAG_CACHE_MISSES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_5_L2_REQUESTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS_POINT, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS_BILINEAR, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_2D_FILTER_WORKLOAD_16BIT, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_2D_FILTER_WORKLOAD_32BIT, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_TPA2TPC_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_MISSES_ASTC_1TILE, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_MISSES_ASTC_2TILE, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_MISSES_ASTC_4TILE, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_5_L2_COMPRESS_REQS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_5_L2_COMPRESS_MISS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_BANK_CONFLICT, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_5_MISS_LATENCY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_5_MISS_LATENCY_TRANS, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_QUADS_CONSTANT_MULTIPLIED, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_FRONTEND_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_TAG_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_DATA_WRITE_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_PRE_L1_DECOM_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_BACKEND_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_FLAG_CACHE_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_L1_5_CACHE_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_STARVE_CYCLES_SP, UINT64, AVERAGE),
+      COUNTABLE(PERF_TP_STARVE_CYCLES_UCHE, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter uche_counters[] = {
-       COUNTER(UCHE_PERFCTR_UCHE_SEL(0),  RBBM_PERFCTR_UCHE(0),  RBBM_PERFCTR_UCHE(0)+1),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL(1),  RBBM_PERFCTR_UCHE(1),  RBBM_PERFCTR_UCHE(1)+1),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL(2),  RBBM_PERFCTR_UCHE(2),  RBBM_PERFCTR_UCHE(2)+1),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL(3),  RBBM_PERFCTR_UCHE(3),  RBBM_PERFCTR_UCHE(3)+1),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL(4),  RBBM_PERFCTR_UCHE(4),  RBBM_PERFCTR_UCHE(4)+1),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL(5),  RBBM_PERFCTR_UCHE(5),  RBBM_PERFCTR_UCHE(5)+1),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL(6),  RBBM_PERFCTR_UCHE(6),  RBBM_PERFCTR_UCHE(6)+1),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL(7),  RBBM_PERFCTR_UCHE(7),  RBBM_PERFCTR_UCHE(7)+1),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL(8),  RBBM_PERFCTR_UCHE(8),  RBBM_PERFCTR_UCHE(8)+1),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL(9),  RBBM_PERFCTR_UCHE(9),  RBBM_PERFCTR_UCHE(9)+1),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL(10), RBBM_PERFCTR_UCHE(10), RBBM_PERFCTR_UCHE(10)+1),
-       COUNTER(UCHE_PERFCTR_UCHE_SEL(11), RBBM_PERFCTR_UCHE(11), RBBM_PERFCTR_UCHE(11)+1),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL(0),  RBBM_PERFCTR_UCHE(0),  RBBM_PERFCTR_UCHE(0)+1),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL(1),  RBBM_PERFCTR_UCHE(1),  RBBM_PERFCTR_UCHE(1)+1),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL(2),  RBBM_PERFCTR_UCHE(2),  RBBM_PERFCTR_UCHE(2)+1),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL(3),  RBBM_PERFCTR_UCHE(3),  RBBM_PERFCTR_UCHE(3)+1),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL(4),  RBBM_PERFCTR_UCHE(4),  RBBM_PERFCTR_UCHE(4)+1),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL(5),  RBBM_PERFCTR_UCHE(5),  RBBM_PERFCTR_UCHE(5)+1),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL(6),  RBBM_PERFCTR_UCHE(6),  RBBM_PERFCTR_UCHE(6)+1),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL(7),  RBBM_PERFCTR_UCHE(7),  RBBM_PERFCTR_UCHE(7)+1),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL(8),  RBBM_PERFCTR_UCHE(8),  RBBM_PERFCTR_UCHE(8)+1),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL(9),  RBBM_PERFCTR_UCHE(9),  RBBM_PERFCTR_UCHE(9)+1),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL(10), RBBM_PERFCTR_UCHE(10), RBBM_PERFCTR_UCHE(10)+1),
+      COUNTER(UCHE_PERFCTR_UCHE_SEL(11), RBBM_PERFCTR_UCHE(11), RBBM_PERFCTR_UCHE(11)+1),
 };
 
 static const struct fd_perfcntr_countable uche_countables[] = {
-       COUNTABLE(PERF_UCHE_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_STALL_CYCLES_ARBITER, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_LATENCY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_LATENCY_SAMPLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_TP, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_VFD, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_HLSQ, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_LRZ, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_SP, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_READ_REQUESTS_TP, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_READ_REQUESTS_VFD, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_READ_REQUESTS_HLSQ, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_READ_REQUESTS_LRZ, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_READ_REQUESTS_SP, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_WRITE_REQUESTS_LRZ, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_WRITE_REQUESTS_SP, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_WRITE_REQUESTS_VPC, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_WRITE_REQUESTS_VSC, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_EVICTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ0, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ1, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ2, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ3, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ4, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ5, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ6, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_BANK_REQ7, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_CH0, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_CH1, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_GMEM_READ_BEATS, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_TPH_REF_FULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_TPH_VICTIM_FULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_TPH_EXT_FULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_STALL_WRITE_DATA, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_DCMP_LATENCY_SAMPLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_DCMP_LATENCY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_PC, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_READ_REQUESTS_PC, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_RAM_READ_REQ, UINT64, AVERAGE),
-       COUNTABLE(PERF_UCHE_RAM_WRITE_REQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_STALL_CYCLES_ARBITER, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_LATENCY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_LATENCY_SAMPLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_TP, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_VFD, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_HLSQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_SP, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_READ_REQUESTS_TP, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_READ_REQUESTS_VFD, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_READ_REQUESTS_HLSQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_READ_REQUESTS_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_READ_REQUESTS_SP, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_WRITE_REQUESTS_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_WRITE_REQUESTS_SP, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_WRITE_REQUESTS_VPC, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_WRITE_REQUESTS_VSC, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_EVICTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ0, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ1, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ2, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ3, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ4, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ5, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ6, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_BANK_REQ7, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_CH0, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_CH1, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_GMEM_READ_BEATS, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_TPH_REF_FULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_TPH_VICTIM_FULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_TPH_EXT_FULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_STALL_WRITE_DATA, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_DCMP_LATENCY_SAMPLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_DCMP_LATENCY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_PC, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_READ_REQUESTS_PC, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_RAM_READ_REQ, UINT64, AVERAGE),
+      COUNTABLE(PERF_UCHE_RAM_WRITE_REQ, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter vfd_counters[] = {
-       COUNTER(VFD_PERFCTR_VFD_SEL(0), RBBM_PERFCTR_VFD(0), RBBM_PERFCTR_VFD(0)+1),
-       COUNTER(VFD_PERFCTR_VFD_SEL(1), RBBM_PERFCTR_VFD(1), RBBM_PERFCTR_VFD(1)+1),
-       COUNTER(VFD_PERFCTR_VFD_SEL(2), RBBM_PERFCTR_VFD(2), RBBM_PERFCTR_VFD(2)+1),
-       COUNTER(VFD_PERFCTR_VFD_SEL(3), RBBM_PERFCTR_VFD(3), RBBM_PERFCTR_VFD(3)+1),
-       COUNTER(VFD_PERFCTR_VFD_SEL(4), RBBM_PERFCTR_VFD(4), RBBM_PERFCTR_VFD(4)+1),
-       COUNTER(VFD_PERFCTR_VFD_SEL(5), RBBM_PERFCTR_VFD(5), RBBM_PERFCTR_VFD(5)+1),
-       COUNTER(VFD_PERFCTR_VFD_SEL(6), RBBM_PERFCTR_VFD(6), RBBM_PERFCTR_VFD(6)+1),
-       COUNTER(VFD_PERFCTR_VFD_SEL(7), RBBM_PERFCTR_VFD(7), RBBM_PERFCTR_VFD(7)+1),
+      COUNTER(VFD_PERFCTR_VFD_SEL(0), RBBM_PERFCTR_VFD(0), RBBM_PERFCTR_VFD(0)+1),
+      COUNTER(VFD_PERFCTR_VFD_SEL(1), RBBM_PERFCTR_VFD(1), RBBM_PERFCTR_VFD(1)+1),
+      COUNTER(VFD_PERFCTR_VFD_SEL(2), RBBM_PERFCTR_VFD(2), RBBM_PERFCTR_VFD(2)+1),
+      COUNTER(VFD_PERFCTR_VFD_SEL(3), RBBM_PERFCTR_VFD(3), RBBM_PERFCTR_VFD(3)+1),
+      COUNTER(VFD_PERFCTR_VFD_SEL(4), RBBM_PERFCTR_VFD(4), RBBM_PERFCTR_VFD(4)+1),
+      COUNTER(VFD_PERFCTR_VFD_SEL(5), RBBM_PERFCTR_VFD(5), RBBM_PERFCTR_VFD(5)+1),
+      COUNTER(VFD_PERFCTR_VFD_SEL(6), RBBM_PERFCTR_VFD(6), RBBM_PERFCTR_VFD(6)+1),
+      COUNTER(VFD_PERFCTR_VFD_SEL(7), RBBM_PERFCTR_VFD(7), RBBM_PERFCTR_VFD(7)+1),
 };
 
 static const struct fd_perfcntr_countable vfd_countables[] = {
-       COUNTABLE(PERF_VFD_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_STALL_CYCLES_VPC_ALLOC, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_STALL_CYCLES_SP_INFO, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_STALL_CYCLES_SP_ATTR, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_STARVE_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_RBUFFER_FULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_ATTR_INFO_FIFO_FULL, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_DECODED_ATTRIBUTE_BYTES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_NUM_ATTRIBUTES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_UPPER_SHADER_FIBERS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_LOWER_SHADER_FIBERS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_MODE_0_FIBERS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_MODE_1_FIBERS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_MODE_2_FIBERS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_MODE_3_FIBERS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_MODE_4_FIBERS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFD_TOTAL_VERTICES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD_INDEX, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD_PROG, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFDP_STARVE_CYCLES_PC, UINT64, AVERAGE),
-       COUNTABLE(PERF_VFDP_VS_STAGE_WAVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_STALL_CYCLES_VPC_ALLOC, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_STALL_CYCLES_SP_INFO, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_STALL_CYCLES_SP_ATTR, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_STARVE_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_RBUFFER_FULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_ATTR_INFO_FIFO_FULL, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_DECODED_ATTRIBUTE_BYTES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_NUM_ATTRIBUTES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_UPPER_SHADER_FIBERS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_LOWER_SHADER_FIBERS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_MODE_0_FIBERS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_MODE_1_FIBERS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_MODE_2_FIBERS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_MODE_3_FIBERS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_MODE_4_FIBERS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFD_TOTAL_VERTICES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD_INDEX, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD_PROG, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFDP_STARVE_CYCLES_PC, UINT64, AVERAGE),
+      COUNTABLE(PERF_VFDP_VS_STAGE_WAVES, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter vpc_counters[] = {
-       COUNTER(VPC_PERFCTR_VPC_SEL(0), RBBM_PERFCTR_VPC(0), RBBM_PERFCTR_VPC(0)+1),
-       COUNTER(VPC_PERFCTR_VPC_SEL(1), RBBM_PERFCTR_VPC(1), RBBM_PERFCTR_VPC(1)+1),
-       COUNTER(VPC_PERFCTR_VPC_SEL(2), RBBM_PERFCTR_VPC(2), RBBM_PERFCTR_VPC(2)+1),
-       COUNTER(VPC_PERFCTR_VPC_SEL(3), RBBM_PERFCTR_VPC(3), RBBM_PERFCTR_VPC(3)+1),
-       COUNTER(VPC_PERFCTR_VPC_SEL(4), RBBM_PERFCTR_VPC(4), RBBM_PERFCTR_VPC(4)+1),
-       COUNTER(VPC_PERFCTR_VPC_SEL(5), RBBM_PERFCTR_VPC(5), RBBM_PERFCTR_VPC(5)+1),
+      COUNTER(VPC_PERFCTR_VPC_SEL(0), RBBM_PERFCTR_VPC(0), RBBM_PERFCTR_VPC(0)+1),
+      COUNTER(VPC_PERFCTR_VPC_SEL(1), RBBM_PERFCTR_VPC(1), RBBM_PERFCTR_VPC(1)+1),
+      COUNTER(VPC_PERFCTR_VPC_SEL(2), RBBM_PERFCTR_VPC(2), RBBM_PERFCTR_VPC(2)+1),
+      COUNTER(VPC_PERFCTR_VPC_SEL(3), RBBM_PERFCTR_VPC(3), RBBM_PERFCTR_VPC(3)+1),
+      COUNTER(VPC_PERFCTR_VPC_SEL(4), RBBM_PERFCTR_VPC(4), RBBM_PERFCTR_VPC(4)+1),
+      COUNTER(VPC_PERFCTR_VPC_SEL(5), RBBM_PERFCTR_VPC(5), RBBM_PERFCTR_VPC(5)+1),
 };
 
 static const struct fd_perfcntr_countable vpc_countables[] = {
-       COUNTABLE(PERF_VPC_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STALL_CYCLES_VFD_WACK, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STALL_CYCLES_PC, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STALL_CYCLES_SP_LM, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STARVE_CYCLES_SP, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STARVE_CYCLES_LRZ, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_PC_PRIMITIVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_SP_COMPONENTS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STALL_CYCLES_VPCRAM_POS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_LRZ_ASSIGN_PRIMITIVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_RB_VISIBLE_PRIMITIVES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_LM_TRANSACTION, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STREAMOUT_TRANSACTION, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_VS_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_PS_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_VS_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_PS_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_STARVE_CYCLES_RB, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_NUM_VPCRAM_READ_POS, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_WIT_FULL_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_VPCRAM_FULL_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_LM_FULL_WAIT_FOR_INTP_END, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_NUM_VPCRAM_WRITE, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_NUM_VPCRAM_READ_SO, UINT64, AVERAGE),
-       COUNTABLE(PERF_VPC_NUM_ATTR_REQ_LM, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STALL_CYCLES_VFD_WACK, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STALL_CYCLES_PC, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STALL_CYCLES_SP_LM, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STARVE_CYCLES_SP, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STARVE_CYCLES_LRZ, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_PC_PRIMITIVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_SP_COMPONENTS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STALL_CYCLES_VPCRAM_POS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_LRZ_ASSIGN_PRIMITIVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_RB_VISIBLE_PRIMITIVES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_LM_TRANSACTION, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STREAMOUT_TRANSACTION, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_VS_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_PS_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_VS_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_PS_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_STARVE_CYCLES_RB, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_NUM_VPCRAM_READ_POS, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_WIT_FULL_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_VPCRAM_FULL_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_LM_FULL_WAIT_FOR_INTP_END, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_NUM_VPCRAM_WRITE, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_NUM_VPCRAM_READ_SO, UINT64, AVERAGE),
+      COUNTABLE(PERF_VPC_NUM_ATTR_REQ_LM, UINT64, AVERAGE),
 };
 
 static const struct fd_perfcntr_counter vsc_counters[] = {
-       COUNTER(VSC_PERFCTR_VSC_SEL(0), RBBM_PERFCTR_VSC(0), RBBM_PERFCTR_VSC(0)+1),
-       COUNTER(VSC_PERFCTR_VSC_SEL(1), RBBM_PERFCTR_VSC(1), RBBM_PERFCTR_VSC(1)+1),
+      COUNTER(VSC_PERFCTR_VSC_SEL(0), RBBM_PERFCTR_VSC(0), RBBM_PERFCTR_VSC(0)+1),
+      COUNTER(VSC_PERFCTR_VSC_SEL(1), RBBM_PERFCTR_VSC(1), RBBM_PERFCTR_VSC(1)+1),
 };
 
 static const struct fd_perfcntr_countable vsc_countables[] = {
-       COUNTABLE(PERF_VSC_BUSY_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VSC_WORKING_CYCLES, UINT64, AVERAGE),
-       COUNTABLE(PERF_VSC_STALL_CYCLES_UCHE, UINT64, AVERAGE),
-       COUNTABLE(PERF_VSC_EOT_NUM, UINT64, AVERAGE),
-       COUNTABLE(PERF_VSC_INPUT_TILES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VSC_BUSY_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VSC_WORKING_CYCLES, UINT64, AVERAGE),
+      COUNTABLE(PERF_VSC_STALL_CYCLES_UCHE, UINT64, AVERAGE),
+      COUNTABLE(PERF_VSC_EOT_NUM, UINT64, AVERAGE),
+      COUNTABLE(PERF_VSC_INPUT_TILES, UINT64, AVERAGE),
 };
 
 const struct fd_perfcntr_group a6xx_perfcntr_groups[] = {
-       GROUP("CP", cp_counters, cp_countables),
-       GROUP("CCU", ccu_counters, ccu_countables),
-       GROUP("TSE", tse_counters, tse_countables),
-       GROUP("RAS", ras_counters, ras_countables),
-       GROUP("LRZ", lrz_counters, lrz_countables),
-       GROUP("HLSQ", hlsq_counters, hlsq_countables),
-       GROUP("PC", pc_counters, pc_countables),
-       GROUP("RB", rb_counters, rb_countables),
-//     GROUP("RBBM", rbbm_counters, rbbm_countables),
-       GROUP("SP", sp_counters, sp_countables),
-       GROUP("TP", tp_counters, tp_countables),
-       GROUP("UCHE", uche_counters, uche_countables),
-       GROUP("VFD", vfd_counters, vfd_countables),
-       GROUP("VPC", vpc_counters, vpc_countables),
-       GROUP("VSC", vsc_counters, vsc_countables),
-//     GROUP("VBIF", vbif_counters, vbif_countables),
+      GROUP("CP", cp_counters, cp_countables),
+      GROUP("CCU", ccu_counters, ccu_countables),
+      GROUP("TSE", tse_counters, tse_countables),
+      GROUP("RAS", ras_counters, ras_countables),
+      GROUP("LRZ", lrz_counters, lrz_countables),
+      GROUP("HLSQ", hlsq_counters, hlsq_countables),
+      GROUP("PC", pc_counters, pc_countables),
+      GROUP("RB", rb_counters, rb_countables),
+      //       GROUP("RBBM", rbbm_counters, rbbm_countables),
+      GROUP("SP", sp_counters, sp_countables),
+      GROUP("TP", tp_counters, tp_countables),
+      GROUP("UCHE", uche_counters, uche_countables),
+      GROUP("VFD", vfd_counters, vfd_countables),
+      GROUP("VPC", vpc_counters, vpc_countables),
+      GROUP("VSC", vsc_counters, vsc_countables),
+      //       GROUP("VBIF", vbif_counters, vbif_countables),
 };
 
 const unsigned a6xx_num_perfcntr_groups = ARRAY_SIZE(a6xx_perfcntr_groups);
index a847866..59b6f06 100644 (file)
  */
 
 #include <assert.h>
+#include <curses.h>
 #include <err.h>
+#include <inttypes.h>
+#include <libconfig.h>
 #include <locale.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <stdint.h>
 #include <time.h>
-#include <curses.h>
-#include <libconfig.h>
-#include <inttypes.h>
 #include <xf86drm.h>
 
 #include "drm/freedreno_drmif.h"
  */
 
 struct counter_group {
-       const struct fd_perfcntr_group *group;
-
-       struct {
-               const struct fd_perfcntr_counter *counter;
-               uint16_t select_val;
-               volatile uint32_t *val_hi;
-               volatile uint32_t *val_lo;
-       } counter[MAX_CNTR_PER_GROUP];
-
-       /* last sample time: */
-       uint32_t stime[MAX_CNTR_PER_GROUP];
-       /* for now just care about the low 32b value.. at least then we don't
-        * have to really care that we can't sample both hi and lo regs at the
-        * same time:
-        */
-       uint32_t last[MAX_CNTR_PER_GROUP];
-       /* current value, ie. by how many did the counter increase in last
-        * sampling period divided by the sampling period:
-        */
-       float current[MAX_CNTR_PER_GROUP];
-       /* name of currently selected counters (for UI): */
-       const char *label[MAX_CNTR_PER_GROUP];
+   const struct fd_perfcntr_group *group;
+
+   struct {
+      const struct fd_perfcntr_counter *counter;
+      uint16_t select_val;
+      volatile uint32_t *val_hi;
+      volatile uint32_t *val_lo;
+   } counter[MAX_CNTR_PER_GROUP];
+
+   /* last sample time: */
+   uint32_t stime[MAX_CNTR_PER_GROUP];
+   /* for now just care about the low 32b value.. at least then we don't
+    * have to really care that we can't sample both hi and lo regs at the
+    * same time:
+    */
+   uint32_t last[MAX_CNTR_PER_GROUP];
+   /* current value, ie. by how many did the counter increase in last
+    * sampling period divided by the sampling period:
+    */
+   float current[MAX_CNTR_PER_GROUP];
+   /* name of currently selected counters (for UI): */
+   const char *label[MAX_CNTR_PER_GROUP];
 };
 
 static struct {
-       void *io;
-       uint32_t chipid;
-       uint32_t min_freq;
-       uint32_t max_freq;
-       /* per-generation table of counters: */
-       unsigned ngroups;
-       struct counter_group *groups;
-       /* drm device (for writing select regs via ring): */
-       struct fd_device *dev;
-       struct fd_pipe *pipe;
-       struct fd_submit *submit;
-       struct fd_ringbuffer *ring;
+   void *io;
+   uint32_t chipid;
+   uint32_t min_freq;
+   uint32_t max_freq;
+   /* per-generation table of counters: */
+   unsigned ngroups;
+   struct counter_group *groups;
+   /* drm device (for writing select regs via ring): */
+   struct fd_device *dev;
+   struct fd_pipe *pipe;
+   struct fd_submit *submit;
+   struct fd_ringbuffer *ring;
 } dev;
 
 static void config_save(void);
@@ -100,67 +100,65 @@ static void restore_counter_groups(void);
 static uint32_t
 gettime_us(void)
 {
-       struct timespec ts;
-       clock_gettime(CLOCK_MONOTONIC, &ts);
-       return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
+   struct timespec ts;
+   clock_gettime(CLOCK_MONOTONIC, &ts);
+   return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
 }
 
 static uint32_t
 delta(uint32_t a, uint32_t b)
 {
-       /* deal with rollover: */
-       if (a > b)
-               return 0xffffffff - a + b;
-       else
-               return b - a;
+   /* deal with rollover: */
+   if (a > b)
+      return 0xffffffff - a + b;
+   else
+      return b - a;
 }
 
 static void
 find_device(void)
 {
-       int ret, fd;
+   int ret, fd;
 
-       fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
-       if (fd < 0)
-               err(1, "could not open drm device");
+   fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
+   if (fd < 0)
+      err(1, "could not open drm device");
 
-       dev.dev  = fd_device_new(fd);
-       dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
+   dev.dev = fd_device_new(fd);
+   dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
 
-       uint64_t val;
-       ret = fd_pipe_get_param(dev.pipe, FD_CHIP_ID, &val);
-       if (ret) {
-               err(1, "could not get gpu-id");
-       }
-       dev.chipid = val;
+   uint64_t val;
+   ret = fd_pipe_get_param(dev.pipe, FD_CHIP_ID, &val);
+   if (ret) {
+      err(1, "could not get gpu-id");
+   }
+   dev.chipid = val;
 
 #define CHIP_FMT "d%d%d.%d"
-#define CHIP_ARGS(chipid) \
-               ((chipid) >> 24) & 0xff, \
-               ((chipid) >> 16) & 0xff, \
-               ((chipid) >> 8) & 0xff, \
-               ((chipid) >> 0) & 0xff
-       printf("device: a%"CHIP_FMT"\n", CHIP_ARGS(dev.chipid));
-
-       /* try MAX_FREQ first as that will work regardless of old dt
-        * dt bindings vs upstream bindings:
-        */
-       ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
-       if (ret) {
-               printf("falling back to parsing DT bindings for freq\n");
-               if (!fd_dt_find_freqs(&dev.min_freq, &dev.max_freq))
-                       err(1, "could not find GPU freqs");
-       } else {
-               dev.min_freq = 0;
-               dev.max_freq = val;
-       }
-
-       printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
-
-       dev.io = fd_dt_find_io();
-       if (!dev.io) {
-               err(1, "could not map device");
-       }
+#define CHIP_ARGS(chipid)                                                      \
+   ((chipid) >> 24) & 0xff, ((chipid) >> 16) & 0xff, ((chipid) >> 8) & 0xff,   \
+      ((chipid) >> 0) & 0xff
+   printf("device: a%" CHIP_FMT "\n", CHIP_ARGS(dev.chipid));
+
+   /* try MAX_FREQ first as that will work regardless of old dt
+    * dt bindings vs upstream bindings:
+    */
+   ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
+   if (ret) {
+      printf("falling back to parsing DT bindings for freq\n");
+      if (!fd_dt_find_freqs(&dev.min_freq, &dev.max_freq))
+         err(1, "could not find GPU freqs");
+   } else {
+      dev.min_freq = 0;
+      dev.max_freq = val;
+   }
+
+   printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
+
+   dev.io = fd_dt_find_io();
+   if (!dev.io) {
+      err(1, "could not map device");
+   }
 }
 
 /*
@@ -170,115 +168,115 @@ find_device(void)
 static void
 flush_ring(void)
 {
-       int ret;
+   int ret;
 
-       if (!dev.submit)
-               return;
+   if (!dev.submit)
+      return;
 
-       ret = fd_submit_flush(dev.submit, -1, NULL, NULL);
-       if (ret)
-               errx(1, "submit failed: %d", ret);
-       fd_ringbuffer_del(dev.ring);
-       fd_submit_del(dev.submit);
+   ret = fd_submit_flush(dev.submit, -1, NULL, NULL);
+   if (ret)
+      errx(1, "submit failed: %d", ret);
+   fd_ringbuffer_del(dev.ring);
+   fd_submit_del(dev.submit);
 
-       dev.ring = NULL;
-       dev.submit = NULL;
+   dev.ring = NULL;
+   dev.submit = NULL;
 }
 
 static void
 select_counter(struct counter_group *group, int ctr, int n)
 {
-       assert(n < group->group->num_countables);
-       assert(ctr < group->group->num_counters);
-
-       group->label[ctr] = group->group->countables[n].name;
-       group->counter[ctr].select_val = n;
-
-       if (!dev.submit) {
-               dev.submit = fd_submit_new(dev.pipe);
-               dev.ring = fd_submit_new_ringbuffer(dev.submit, 0x1000,
-                               FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
-       }
-
-       /* bashing select register directly while gpu is active will end
-        * in tears.. so we need to write it via the ring:
-        *
-        * TODO it would help startup time, if gpu is loaded, to batch
-        * all the initial writes and do a single flush.. although that
-        * makes things more complicated for capturing inital sample value
-        */
-       struct fd_ringbuffer *ring = dev.ring;
-       switch (dev.chipid >> 24) {
-       case 2:
-       case 3:
-       case 4:
-               OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
-               OUT_RING(ring, 0x00000000);
-
-               if (group->group->counters[ctr].enable) {
-                       OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
-                       OUT_RING(ring, 0);
-               }
-
-               if (group->group->counters[ctr].clear) {
-                       OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
-                       OUT_RING(ring, 1);
-
-                       OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
-                       OUT_RING(ring, 0);
-               }
-
-               OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
-               OUT_RING(ring, n);
-
-               if (group->group->counters[ctr].enable) {
-                       OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
-                       OUT_RING(ring, 1);
-               }
-
-               break;
-       case 5:
-       case 6:
-               OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
-
-               if (group->group->counters[ctr].enable) {
-                       OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
-                       OUT_RING(ring, 0);
-               }
-
-               if (group->group->counters[ctr].clear) {
-                       OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
-                       OUT_RING(ring, 1);
-
-                       OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
-                       OUT_RING(ring, 0);
-               }
-
-               OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
-               OUT_RING(ring, n);
-
-               if (group->group->counters[ctr].enable) {
-                       OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
-                       OUT_RING(ring, 1);
-               }
-
-               break;
-       }
-
-       group->last[ctr] = *group->counter[ctr].val_lo;
-       group->stime[ctr] = gettime_us();
+   assert(n < group->group->num_countables);
+   assert(ctr < group->group->num_counters);
+
+   group->label[ctr] = group->group->countables[n].name;
+   group->counter[ctr].select_val = n;
+
+   if (!dev.submit) {
+      dev.submit = fd_submit_new(dev.pipe);
+      dev.ring = fd_submit_new_ringbuffer(
+         dev.submit, 0x1000, FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
+   }
+
+   /* bashing select register directly while gpu is active will end
+    * in tears.. so we need to write it via the ring:
+    *
+    * TODO it would help startup time, if gpu is loaded, to batch
+    * all the initial writes and do a single flush.. although that
+    * makes things more complicated for capturing inital sample value
+    */
+   struct fd_ringbuffer *ring = dev.ring;
+   switch (dev.chipid >> 24) {
+   case 2:
+   case 3:
+   case 4:
+      OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
+      OUT_RING(ring, 0x00000000);
+
+      if (group->group->counters[ctr].enable) {
+         OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
+         OUT_RING(ring, 0);
+      }
+
+      if (group->group->counters[ctr].clear) {
+         OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
+         OUT_RING(ring, 1);
+
+         OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
+         OUT_RING(ring, 0);
+      }
+
+      OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
+      OUT_RING(ring, n);
+
+      if (group->group->counters[ctr].enable) {
+         OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
+         OUT_RING(ring, 1);
+      }
+
+      break;
+   case 5:
+   case 6:
+      OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
+
+      if (group->group->counters[ctr].enable) {
+         OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
+         OUT_RING(ring, 0);
+      }
+
+      if (group->group->counters[ctr].clear) {
+         OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
+         OUT_RING(ring, 1);
+
+         OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
+         OUT_RING(ring, 0);
+      }
+
+      OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
+      OUT_RING(ring, n);
+
+      if (group->group->counters[ctr].enable) {
+         OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
+         OUT_RING(ring, 1);
+      }
+
+      break;
+   }
+
+   group->last[ctr] = *group->counter[ctr].val_lo;
+   group->stime[ctr] = gettime_us();
 }
 
 static void
 resample_counter(struct counter_group *group, int ctr)
 {
-       uint32_t val = *group->counter[ctr].val_lo;
-       uint32_t t = gettime_us();
-       uint32_t dt = delta(group->stime[ctr], t);
-       uint32_t dval = delta(group->last[ctr], val);
-       group->current[ctr] = (float)dval * 1000000.0 / (float)dt;
-       group->last[ctr] = val;
-       group->stime[ctr] = t;
+   uint32_t val = *group->counter[ctr].val_lo;
+   uint32_t t = gettime_us();
+   uint32_t dt = delta(group->stime[ctr], t);
+   uint32_t dval = delta(group->last[ctr], val);
+   group->current[ctr] = (float)dval * 1000000.0 / (float)dt;
+   group->last[ctr] = val;
+   group->stime[ctr] = t;
 }
 
 #define REFRESH_MS 500
@@ -287,20 +285,20 @@ resample_counter(struct counter_group *group, int ctr)
 static void
 resample(void)
 {
-       static uint64_t last_time;
-       uint64_t current_time = gettime_us();
+   static uint64_t last_time;
+   uint64_t current_time = gettime_us();
 
-       if ((current_time - last_time) < (REFRESH_MS * 1000 / 2))
-               return;
+   if ((current_time - last_time) < (REFRESH_MS * 1000 / 2))
+      return;
 
-       last_time = current_time;
+   last_time = current_time;
 
-       for (unsigned i = 0; i < dev.ngroups; i++) {
-               struct counter_group *group = &dev.groups[i];
-               for (unsigned j = 0; j < group->group->num_counters; j++) {
-                       resample_counter(group, j);
-               }
-       }
+   for (unsigned i = 0; i < dev.ngroups; i++) {
+      struct counter_group *group = &dev.groups[i];
+      for (unsigned j = 0; j < group->group->num_counters; j++) {
+         resample_counter(group, j);
+      }
+   }
 }
 
 /*
@@ -318,433 +316,433 @@ static int max_rows, current_cntr = 1;
 static void
 redraw_footer(WINDOW *win)
 {
-       char *footer;
-       int n;
+   char *footer;
+   int n;
 
-       n = asprintf(&footer, " fdperf: a%"CHIP_FMT" (%.2fMHz..%.2fMHz)",
-                       CHIP_ARGS(dev.chipid),
-                       ((float)dev.min_freq) / 1000000.0,
-                       ((float)dev.max_freq) / 1000000.0);
+   n = asprintf(&footer, " fdperf: a%" CHIP_FMT " (%.2fMHz..%.2fMHz)",
+                CHIP_ARGS(dev.chipid), ((float)dev.min_freq) / 1000000.0,
+                ((float)dev.max_freq) / 1000000.0);
 
-       wmove(win, h - 1, 0);
-       wattron(win, COLOR_PAIR(COLOR_FOOTER));
-       waddstr(win, footer);
-       whline(win, ' ', w - n);
-       wattroff(win, COLOR_PAIR(COLOR_FOOTER));
+   wmove(win, h - 1, 0);
+   wattron(win, COLOR_PAIR(COLOR_FOOTER));
+   waddstr(win, footer);
+   whline(win, ' ', w - n);
+   wattroff(win, COLOR_PAIR(COLOR_FOOTER));
 
-       free(footer);
+   free(footer);
 }
 
 static void
 redraw_group_header(WINDOW *win, int row, const char *name)
 {
-       wmove(win, row, 0);
-       wattron(win, A_BOLD);
-       wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
-       waddstr(win, name);
-       whline(win, ' ', w - strlen(name));
-       wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
-       wattroff(win, A_BOLD);
+   wmove(win, row, 0);
+   wattron(win, A_BOLD);
+   wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
+   waddstr(win, name);
+   whline(win, ' ', w - strlen(name));
+   wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
+   wattroff(win, A_BOLD);
 }
 
 static void
 redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)
 {
-       int n = strlen(name);
-       assert(n <= ctr_width);
-       wmove(win, row, 0);
-       whline(win, ' ', ctr_width - n);
-       wmove(win, row, ctr_width - n);
-       if (selected)
-               wattron(win, COLOR_PAIR(COLOR_INVERSE));
-       waddstr(win, name);
-       if (selected)
-               wattroff(win, COLOR_PAIR(COLOR_INVERSE));
-       waddstr(win, ": ");
+   int n = strlen(name);
+   assert(n <= ctr_width);
+   wmove(win, row, 0);
+   whline(win, ' ', ctr_width - n);
+   wmove(win, row, ctr_width - n);
+   if (selected)
+      wattron(win, COLOR_PAIR(COLOR_INVERSE));
+   waddstr(win, name);
+   if (selected)
+      wattroff(win, COLOR_PAIR(COLOR_INVERSE));
+   waddstr(win, ": ");
 }
 
 static void
 redraw_counter_value_cycles(WINDOW *win, float val)
 {
-       char *str;
-       int x = getcurx(win);
-       int valwidth = w - x;
-       int barwidth, n;
-
-       /* convert to fraction of max freq: */
-       val = val / (float)dev.max_freq;
-
-       /* figure out percentage-bar width: */
-       barwidth = (int)(val * valwidth);
-
-       /* sometimes things go over 100%.. idk why, could be
-        * things running faster than base clock, or counter
-        * summing up cycles in multiple cores?
-        */
-       barwidth = MIN2(barwidth, valwidth - 1);
-
-       n = asprintf(&str, "%.2f%%", 100.0 * val);
-       wattron(win, COLOR_PAIR(COLOR_INVERSE));
-       waddnstr(win, str, barwidth);
-       if (barwidth > n) {
-               whline(win, ' ', barwidth - n);
-               wmove(win, getcury(win), x + barwidth);
-       }
-       wattroff(win, COLOR_PAIR(COLOR_INVERSE));
-       if (barwidth < n)
-               waddstr(win, str + barwidth);
-       whline(win, ' ', w - getcurx(win));
-
-       free(str);
+   char *str;
+   int x = getcurx(win);
+   int valwidth = w - x;
+   int barwidth, n;
+
+   /* convert to fraction of max freq: */
+   val = val / (float)dev.max_freq;
+
+   /* figure out percentage-bar width: */
+   barwidth = (int)(val * valwidth);
+
+   /* sometimes things go over 100%.. idk why, could be
+    * things running faster than base clock, or counter
+    * summing up cycles in multiple cores?
+    */
+   barwidth = MIN2(barwidth, valwidth - 1);
+
+   n = asprintf(&str, "%.2f%%", 100.0 * val);
+   wattron(win, COLOR_PAIR(COLOR_INVERSE));
+   waddnstr(win, str, barwidth);
+   if (barwidth > n) {
+      whline(win, ' ', barwidth - n);
+      wmove(win, getcury(win), x + barwidth);
+   }
+   wattroff(win, COLOR_PAIR(COLOR_INVERSE));
+   if (barwidth < n)
+      waddstr(win, str + barwidth);
+   whline(win, ' ', w - getcurx(win));
+
+   free(str);
 }
 
 static void
 redraw_counter_value_raw(WINDOW *win, float val)
 {
-       char *str;
-       (void) asprintf(&str, "%'.2f", val);
-       waddstr(win, str);
-       whline(win, ' ', w - getcurx(win));
-       free(str);
+   char *str;
+   (void)asprintf(&str, "%'.2f", val);
+   waddstr(win, str);
+   whline(win, ' ', w - getcurx(win));
+   free(str);
 }
 
 static void
-redraw_counter(WINDOW *win, int row, struct counter_group *group,
-               int ctr, bool selected)
+redraw_counter(WINDOW *win, int row, struct counter_group *group, int ctr,
+               bool selected)
 {
-       redraw_counter_label(win, row, group->label[ctr], selected);
-
-       /* quick hack, if the label has "CYCLE" in the name, it is
-        * probably a cycle counter ;-)
-        * Perhaps add more info in rnndb schema to know how to
-        * treat individual counters (ie. which are cycles, and
-        * for those we want to present as a percentage do we
-        * need to scale the result.. ie. is it running at some
-        * multiple or divisor of core clk, etc)
-        *
-        * TODO it would be much more clever to get this from xml
-        * Also.. in some cases I think we want to know how many
-        * units the counter is counting for, ie. if a320 has 2x
-        * shader as a306 we might need to scale the result..
-        */
-       if (strstr(group->label[ctr], "CYCLE") ||
-                       strstr(group->label[ctr], "BUSY") ||
-                       strstr(group->label[ctr], "IDLE"))
-               redraw_counter_value_cycles(win, group->current[ctr]);
-       else
-               redraw_counter_value_raw(win, group->current[ctr]);
+   redraw_counter_label(win, row, group->label[ctr], selected);
+
+   /* quick hack, if the label has "CYCLE" in the name, it is
+    * probably a cycle counter ;-)
+    * Perhaps add more info in rnndb schema to know how to
+    * treat individual counters (ie. which are cycles, and
+    * for those we want to present as a percentage do we
+    * need to scale the result.. ie. is it running at some
+    * multiple or divisor of core clk, etc)
+    *
+    * TODO it would be much more clever to get this from xml
+    * Also.. in some cases I think we want to know how many
+    * units the counter is counting for, ie. if a320 has 2x
+    * shader as a306 we might need to scale the result..
+    */
+   if (strstr(group->label[ctr], "CYCLE") ||
+       strstr(group->label[ctr], "BUSY") || strstr(group->label[ctr], "IDLE"))
+      redraw_counter_value_cycles(win, group->current[ctr]);
+   else
+      redraw_counter_value_raw(win, group->current[ctr]);
 }
 
 static void
 redraw(WINDOW *win)
 {
-       static int scroll = 0;
-       int max, row = 0;
+   static int scroll = 0;
+   int max, row = 0;
 
-       w = getmaxx(win);
-       h = getmaxy(win);
+   w = getmaxx(win);
+   h = getmaxy(win);
 
-       max = h - 3;
+   max = h - 3;
 
-       if ((current_cntr - scroll) > (max - 1)) {
-               scroll = current_cntr - (max - 1);
-       } else if ((current_cntr - 1) < scroll) {
-               scroll = current_cntr - 1;
-       }
+   if ((current_cntr - scroll) > (max - 1)) {
+      scroll = current_cntr - (max - 1);
+   } else if ((current_cntr - 1) < scroll) {
+      scroll = current_cntr - 1;
+   }
 
-       for (unsigned i = 0; i < dev.ngroups; i++) {
-               struct counter_group *group = &dev.groups[i];
-               unsigned j = 0;
+   for (unsigned i = 0; i < dev.ngroups; i++) {
+      struct counter_group *group = &dev.groups[i];
+      unsigned j = 0;
 
-               /* NOTE skip CP the first CP counter */
-               if (i == 0)
-                       j++;
+      /* NOTE skip CP the first CP counter */
+      if (i == 0)
+         j++;
 
-               if (j < group->group->num_counters) {
-                       if ((scroll <= row) && ((row - scroll) < max))
-                               redraw_group_header(win, row - scroll, group->group->name);
-                       row++;
-               }
+      if (j < group->group->num_counters) {
+         if ((scroll <= row) && ((row - scroll) < max))
+            redraw_group_header(win, row - scroll, group->group->name);
+         row++;
+      }
 
-               for (; j < group->group->num_counters; j++) {
-                       if ((scroll <= row) && ((row - scroll) < max))
-                               redraw_counter(win, row - scroll, group, j, row == current_cntr);
-                       row++;
-               }
-       }
+      for (; j < group->group->num_counters; j++) {
+         if ((scroll <= row) && ((row - scroll) < max))
+            redraw_counter(win, row - scroll, group, j, row == current_cntr);
+         row++;
+      }
+   }
 
-       /* convert back to physical (unscrolled) offset: */
-       row = max;
+   /* convert back to physical (unscrolled) offset: */
+   row = max;
 
-       redraw_group_header(win, row, "Status");
-       row++;
+   redraw_group_header(win, row, "Status");
+   row++;
 
-       /* Draw GPU freq row: */
-       redraw_counter_label(win, row, "Freq (MHz)", false);
-       redraw_counter_value_raw(win, dev.groups[0].current[0] / 1000000.0);
-       row++;
+   /* Draw GPU freq row: */
+   redraw_counter_label(win, row, "Freq (MHz)", false);
+   redraw_counter_value_raw(win, dev.groups[0].current[0] / 1000000.0);
+   row++;
 
-       redraw_footer(win);
+   redraw_footer(win);
 
-       refresh();
+   refresh();
 }
 
 static struct counter_group *
 current_counter(int *ctr)
 {
-       int n = 0;
-
-       for (unsigned i = 0; i < dev.ngroups; i++) {
-               struct counter_group *group = &dev.groups[i];
-               unsigned j = 0;
-
-               /* NOTE skip the first CP counter (CP_ALWAYS_COUNT) */
-               if (i == 0)
-                       j++;
-
-               /* account for group header: */
-               if (j < group->group->num_counters) {
-                       /* cannot select group header.. return null to indicate this
-                        * main_ui():
-                        */
-                       if (n == current_cntr)
-                               return NULL;
-                       n++;
-               }
-
-
-               for (; j < group->group->num_counters; j++) {
-                       if (n == current_cntr) {
-                               if (ctr)
-                                       *ctr = j;
-                               return group;
-                       }
-                       n++;
-               }
-       }
-
-       assert(0);
-       return NULL;
+   int n = 0;
+
+   for (unsigned i = 0; i < dev.ngroups; i++) {
+      struct counter_group *group = &dev.groups[i];
+      unsigned j = 0;
+
+      /* NOTE skip the first CP counter (CP_ALWAYS_COUNT) */
+      if (i == 0)
+         j++;
+
+      /* account for group header: */
+      if (j < group->group->num_counters) {
+         /* cannot select group header.. return null to indicate this
+          * main_ui():
+          */
+         if (n == current_cntr)
+            return NULL;
+         n++;
+      }
+
+      for (; j < group->group->num_counters; j++) {
+         if (n == current_cntr) {
+            if (ctr)
+               *ctr = j;
+            return group;
+         }
+         n++;
+      }
+   }
+
+   assert(0);
+   return NULL;
 }
 
 static void
 counter_dialog(void)
 {
-       WINDOW *dialog;
-       struct counter_group *group;
-       int cnt = 0, current = 0, scroll;
-
-       /* figure out dialog size: */
-       int dh = h/2;
-       int dw = ctr_width + 2;
-
-       group = current_counter(&cnt);
-
-       /* find currently selected idx (note there can be discontinuities
-        * so the selected value does not map 1:1 to current idx)
-        */
-       uint32_t selected = group->counter[cnt].select_val;
-       for (int i = 0; i < group->group->num_countables; i++) {
-               if (group->group->countables[i].selector == selected) {
-                       current = i;
-                       break;
-               }
-       }
-
-       /* scrolling offset, if dialog is too small for all the choices: */
-       scroll = 0;
-
-       dialog = newwin(dh, dw, (h-dh)/2, (w-dw)/2);
-       box(dialog, 0, 0);
-       wrefresh(dialog);
-       keypad(dialog, TRUE);
-
-       while (true) {
-               int max = MIN2(dh - 2, group->group->num_countables);
-               int selector = -1;
-
-               if ((current - scroll) >= (dh - 3)) {
-                       scroll = current - (dh - 3);
-               } else if (current < scroll) {
-                       scroll = current;
-               }
-
-               for (int i = 0; i < max; i++) {
-                       int n = scroll + i;
-                       wmove(dialog, i+1, 1);
-                       if (n == current) {
-                               assert (n < group->group->num_countables);
-                               selector = group->group->countables[n].selector;
-                               wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
-                       }
-                       if (n < group->group->num_countables)
-                               waddstr(dialog, group->group->countables[n].name);
-                       whline(dialog, ' ', dw - getcurx(dialog) - 1);
-                       if (n == current)
-                               wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
-               }
-
-               assert (selector >= 0);
-
-               switch (wgetch(dialog)) {
-               case KEY_UP:
-                       current = MAX2(0, current - 1);
-                       break;
-               case KEY_DOWN:
-                       current = MIN2(group->group->num_countables - 1, current + 1);
-                       break;
-               case KEY_LEFT:
-               case KEY_ENTER:
-                       /* select new sampler */
-                       select_counter(group, cnt, selector);
-                       flush_ring();
-                       config_save();
-                       goto out;
-               case 'q':
-                       goto out;
-               default:
-                       /* ignore */
-                       break;
-               }
-
-               resample();
-       }
+   WINDOW *dialog;
+   struct counter_group *group;
+   int cnt = 0, current = 0, scroll;
+
+   /* figure out dialog size: */
+   int dh = h / 2;
+   int dw = ctr_width + 2;
+
+   group = current_counter(&cnt);
+
+   /* find currently selected idx (note there can be discontinuities
+    * so the selected value does not map 1:1 to current idx)
+    */
+   uint32_t selected = group->counter[cnt].select_val;
+   for (int i = 0; i < group->group->num_countables; i++) {
+      if (group->group->countables[i].selector == selected) {
+         current = i;
+         break;
+      }
+   }
+
+   /* scrolling offset, if dialog is too small for all the choices: */
+   scroll = 0;
+
+   dialog = newwin(dh, dw, (h - dh) / 2, (w - dw) / 2);
+   box(dialog, 0, 0);
+   wrefresh(dialog);
+   keypad(dialog, TRUE);
+
+   while (true) {
+      int max = MIN2(dh - 2, group->group->num_countables);
+      int selector = -1;
+
+      if ((current - scroll) >= (dh - 3)) {
+         scroll = current - (dh - 3);
+      } else if (current < scroll) {
+         scroll = current;
+      }
+
+      for (int i = 0; i < max; i++) {
+         int n = scroll + i;
+         wmove(dialog, i + 1, 1);
+         if (n == current) {
+            assert(n < group->group->num_countables);
+            selector = group->group->countables[n].selector;
+            wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
+         }
+         if (n < group->group->num_countables)
+            waddstr(dialog, group->group->countables[n].name);
+         whline(dialog, ' ', dw - getcurx(dialog) - 1);
+         if (n == current)
+            wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
+      }
+
+      assert(selector >= 0);
+
+      switch (wgetch(dialog)) {
+      case KEY_UP:
+         current = MAX2(0, current - 1);
+         break;
+      case KEY_DOWN:
+         current = MIN2(group->group->num_countables - 1, current + 1);
+         break;
+      case KEY_LEFT:
+      case KEY_ENTER:
+         /* select new sampler */
+         select_counter(group, cnt, selector);
+         flush_ring();
+         config_save();
+         goto out;
+      case 'q':
+         goto out;
+      default:
+         /* ignore */
+         break;
+      }
+
+      resample();
+   }
 
 out:
-       wborder(dialog, ' ', ' ', ' ',' ',' ',' ',' ',' ');
-       delwin(dialog);
+   wborder(dialog, ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ');
+   delwin(dialog);
 }
 
 static void
 scroll_cntr(int amount)
 {
-       if (amount < 0) {
-               current_cntr = MAX2(1, current_cntr + amount);
-               if (current_counter(NULL) == NULL) {
-                       current_cntr = MAX2(1, current_cntr - 1);
-               }
-       } else {
-               current_cntr = MIN2(max_rows - 1, current_cntr + amount);
-               if (current_counter(NULL) == NULL)
-                       current_cntr = MIN2(max_rows - 1, current_cntr + 1);
-       }
+   if (amount < 0) {
+      current_cntr = MAX2(1, current_cntr + amount);
+      if (current_counter(NULL) == NULL) {
+         current_cntr = MAX2(1, current_cntr - 1);
+      }
+   } else {
+      current_cntr = MIN2(max_rows - 1, current_cntr + amount);
+      if (current_counter(NULL) == NULL)
+         current_cntr = MIN2(max_rows - 1, current_cntr + 1);
+   }
 }
 
 static void
 main_ui(void)
 {
-       WINDOW *mainwin;
-       uint32_t last_time = gettime_us();
-
-       /* curses setup: */
-       mainwin = initscr();
-       if (!mainwin)
-               goto out;
-
-       cbreak();
-       wtimeout(mainwin, REFRESH_MS);
-       noecho();
-       keypad(mainwin, TRUE);
-       curs_set(0);
-       start_color();
-       init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
-       init_pair(COLOR_FOOTER,       COLOR_WHITE, COLOR_BLUE);
-       init_pair(COLOR_INVERSE,      COLOR_BLACK, COLOR_WHITE);
-
-       while (true) {
-               switch (wgetch(mainwin)) {
-               case KEY_UP:
-                       scroll_cntr(-1);
-                       break;
-               case KEY_DOWN:
-                       scroll_cntr(+1);
-                       break;
-               case KEY_NPAGE:  /* page-down */
-                       /* TODO figure out # of rows visible? */
-                       scroll_cntr(+15);
-                       break;
-               case KEY_PPAGE:  /* page-up */
-                       /* TODO figure out # of rows visible? */
-                       scroll_cntr(-15);
-                       break;
-               case KEY_RIGHT:
-                       counter_dialog();
-                       break;
-               case 'q':
-                       goto out;
-                       break;
-               default:
-                       /* ignore */
-                       break;
-               }
-               resample();
-               redraw(mainwin);
-
-               /* restore the counters every 0.5s in case the GPU has suspended,
-                * in which case the current selected countables will have reset:
-                */
-               uint32_t t = gettime_us();
-               if (delta(last_time, t) > 500000) {
-                       restore_counter_groups();
-                       flush_ring();
-                       last_time = t;
-               }
-       }
-
-       /* restore settings.. maybe we need an atexit()??*/
+   WINDOW *mainwin;
+   uint32_t last_time = gettime_us();
+
+   /* curses setup: */
+   mainwin = initscr();
+   if (!mainwin)
+      goto out;
+
+   cbreak();
+   wtimeout(mainwin, REFRESH_MS);
+   noecho();
+   keypad(mainwin, TRUE);
+   curs_set(0);
+   start_color();
+   init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
+   init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE);
+   init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE);
+
+   while (true) {
+      switch (wgetch(mainwin)) {
+      case KEY_UP:
+         scroll_cntr(-1);
+         break;
+      case KEY_DOWN:
+         scroll_cntr(+1);
+         break;
+      case KEY_NPAGE: /* page-down */
+         /* TODO figure out # of rows visible? */
+         scroll_cntr(+15);
+         break;
+      case KEY_PPAGE: /* page-up */
+         /* TODO figure out # of rows visible? */
+         scroll_cntr(-15);
+         break;
+      case KEY_RIGHT:
+         counter_dialog();
+         break;
+      case 'q':
+         goto out;
+         break;
+      default:
+         /* ignore */
+         break;
+      }
+      resample();
+      redraw(mainwin);
+
+      /* restore the counters every 0.5s in case the GPU has suspended,
+       * in which case the current selected countables will have reset:
+       */
+      uint32_t t = gettime_us();
+      if (delta(last_time, t) > 500000) {
+         restore_counter_groups();
+         flush_ring();
+         last_time = t;
+      }
+   }
+
+   /* restore settings.. maybe we need an atexit()??*/
 out:
-       delwin(mainwin);
-       endwin();
-       refresh();
+   delwin(mainwin);
+   endwin();
+   refresh();
 }
 
 static void
 restore_counter_groups(void)
 {
-       for (unsigned i = 0; i < dev.ngroups; i++) {
-               struct counter_group *group = &dev.groups[i];
-               unsigned j = 0;
-
-               /* NOTE skip CP the first CP counter */
-               if (i == 0)
-                       j++;
-
-               for (; j < group->group->num_counters; j++) {
-                       select_counter(group, j, group->counter[j].select_val);
-               }
-       }
+   for (unsigned i = 0; i < dev.ngroups; i++) {
+      struct counter_group *group = &dev.groups[i];
+      unsigned j = 0;
+
+      /* NOTE skip CP the first CP counter */
+      if (i == 0)
+         j++;
+
+      for (; j < group->group->num_counters; j++) {
+         select_counter(group, j, group->counter[j].select_val);
+      }
+   }
 }
 
 static void
 setup_counter_groups(const struct fd_perfcntr_group *groups)
 {
-       for (unsigned i = 0; i < dev.ngroups; i++) {
-               struct counter_group *group = &dev.groups[i];
+   for (unsigned i = 0; i < dev.ngroups; i++) {
+      struct counter_group *group = &dev.groups[i];
 
-               group->group = &groups[i];
+      group->group = &groups[i];
 
-               max_rows += group->group->num_counters + 1;
+      max_rows += group->group->num_counters + 1;
 
-               /* the first CP counter is hidden: */
-               if (i == 0) {
-                       max_rows--;
-                       if (group->group->num_counters <= 1)
-                               max_rows--;
-               }
+      /* the first CP counter is hidden: */
+      if (i == 0) {
+         max_rows--;
+         if (group->group->num_counters <= 1)
+            max_rows--;
+      }
 
-               for (unsigned j = 0; j < group->group->num_counters; j++) {
-                       group->counter[j].counter = &group->group->counters[j];
+      for (unsigned j = 0; j < group->group->num_counters; j++) {
+         group->counter[j].counter = &group->group->counters[j];
 
-                       group->counter[j].val_hi = dev.io + (group->counter[j].counter->counter_reg_hi * 4);
-                       group->counter[j].val_lo = dev.io + (group->counter[j].counter->counter_reg_lo * 4);
+         group->counter[j].val_hi =
+            dev.io + (group->counter[j].counter->counter_reg_hi * 4);
+         group->counter[j].val_lo =
+            dev.io + (group->counter[j].counter->counter_reg_lo * 4);
 
-                       group->counter[j].select_val = j;
-               }
+         group->counter[j].select_val = j;
+      }
 
-               for (unsigned j = 0; j < group->group->num_countables; j++) {
-                       ctr_width = MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
-               }
-       }
+      for (unsigned j = 0; j < group->group->num_countables; j++) {
+         ctr_width =
+            MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
+      }
+   }
 }
 
 /*
@@ -757,77 +755,76 @@ static config_setting_t *setting;
 static void
 config_save(void)
 {
-       for (unsigned i = 0; i < dev.ngroups; i++) {
-               struct counter_group *group = &dev.groups[i];
-               unsigned j = 0;
-
-               /* NOTE skip CP the first CP counter */
-               if (i == 0)
-                       j++;
-
-               config_setting_t *sect =
-                       config_setting_get_member(setting, group->group->name);
-
-               for (; j < group->group->num_counters; j++) {
-                       char name[] = "counter0000";
-                       sprintf(name, "counter%d", j);
-                       config_setting_t *s =
-                               config_setting_lookup(sect, name);
-                       config_setting_set_int(s, group->counter[j].select_val);
-               }
-       }
-
-       config_write_file(&cfg, "fdperf.cfg");
+   for (unsigned i = 0; i < dev.ngroups; i++) {
+      struct counter_group *group = &dev.groups[i];
+      unsigned j = 0;
+
+      /* NOTE skip CP the first CP counter */
+      if (i == 0)
+         j++;
+
+      config_setting_t *sect =
+         config_setting_get_member(setting, group->group->name);
+
+      for (; j < group->group->num_counters; j++) {
+         char name[] = "counter0000";
+         sprintf(name, "counter%d", j);
+         config_setting_t *s = config_setting_lookup(sect, name);
+         config_setting_set_int(s, group->counter[j].select_val);
+      }
+   }
+
+   config_write_file(&cfg, "fdperf.cfg");
 }
 
 static void
 config_restore(void)
 {
-       char *str;
-
-       config_init(&cfg);
-
-       /* Read the file. If there is an error, report it and exit. */
-       if(!config_read_file(&cfg, "fdperf.cfg")) {
-               warn("could not restore settings");
-       }
-
-       config_setting_t *root = config_root_setting(&cfg);
-
-       /* per device settings: */
-       (void) asprintf(&str, "a%dxx", dev.chipid >> 24);
-       setting = config_setting_get_member(root, str);
-       if (!setting)
-               setting = config_setting_add(root, str, CONFIG_TYPE_GROUP);
-       free(str);
-
-       for (unsigned i = 0; i < dev.ngroups; i++) {
-               struct counter_group *group = &dev.groups[i];
-               unsigned j = 0;
-
-               /* NOTE skip CP the first CP counter */
-               if (i == 0)
-                       j++;
-
-               config_setting_t *sect =
-                       config_setting_get_member(setting, group->group->name);
-
-               if (!sect) {
-                       sect = config_setting_add(setting, group->group->name,
-                                       CONFIG_TYPE_GROUP);
-               }
-
-               for (; j < group->group->num_counters; j++) {
-                       char name[] = "counter0000";
-                       sprintf(name, "counter%d", j);
-                       config_setting_t *s = config_setting_lookup(sect, name);
-                       if (!s) {
-                               config_setting_add(sect, name, CONFIG_TYPE_INT);
-                               continue;
-                       }
-                       select_counter(group, j, config_setting_get_int(s));
-               }
-       }
+   char *str;
+
+   config_init(&cfg);
+
+   /* Read the file. If there is an error, report it and exit. */
+   if (!config_read_file(&cfg, "fdperf.cfg")) {
+      warn("could not restore settings");
+   }
+
+   config_setting_t *root = config_root_setting(&cfg);
+
+   /* per device settings: */
+   (void)asprintf(&str, "a%dxx", dev.chipid >> 24);
+   setting = config_setting_get_member(root, str);
+   if (!setting)
+      setting = config_setting_add(root, str, CONFIG_TYPE_GROUP);
+   free(str);
+
+   for (unsigned i = 0; i < dev.ngroups; i++) {
+      struct counter_group *group = &dev.groups[i];
+      unsigned j = 0;
+
+      /* NOTE skip CP the first CP counter */
+      if (i == 0)
+         j++;
+
+      config_setting_t *sect =
+         config_setting_get_member(setting, group->group->name);
+
+      if (!sect) {
+         sect =
+            config_setting_add(setting, group->group->name, CONFIG_TYPE_GROUP);
+      }
+
+      for (; j < group->group->num_counters; j++) {
+         char name[] = "counter0000";
+         sprintf(name, "counter%d", j);
+         config_setting_t *s = config_setting_lookup(sect, name);
+         if (!s) {
+            config_setting_add(sect, name, CONFIG_TYPE_INT);
+            continue;
+         }
+         select_counter(group, j, config_setting_get_int(s));
+      }
+   }
 }
 
 /*
@@ -837,24 +834,24 @@ config_restore(void)
 int
 main(int argc, char **argv)
 {
-       find_device();
+   find_device();
 
-       const struct fd_perfcntr_group *groups;
-       groups = fd_perfcntrs((dev.chipid >> 24) * 100, &dev.ngroups);
-       if (!groups) {
-               errx(1, "no perfcntr support");
-       }
+   const struct fd_perfcntr_group *groups;
+   groups = fd_perfcntrs((dev.chipid >> 24) * 100, &dev.ngroups);
+   if (!groups) {
+      errx(1, "no perfcntr support");
+   }
 
-       dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
+   dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
 
-       setlocale(LC_NUMERIC, "en_US.UTF-8");
+   setlocale(LC_NUMERIC, "en_US.UTF-8");
 
-       setup_counter_groups(groups);
-       restore_counter_groups();
-       config_restore();
-       flush_ring();
+   setup_counter_groups(groups);
+   restore_counter_groups();
+   config_restore();
+   flush_ring();
 
-       main_ui();
+   main_ui();
 
-       return 0;
+   return 0;
 }
index 1c5e5e9..3f2cbdc 100644 (file)
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <arpa/inet.h>
 #include <fcntl.h>
 #include <ftw.h>
+#include <inttypes.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <stdint.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
 #include <unistd.h>
-#include <inttypes.h>
+#include <arpa/inet.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
 
 #include "util/macros.h"
 #include "util/os_file.h"
 #include "freedreno_dt.h"
 
 static struct {
-       char *dtnode;
-       int address_cells, size_cells;
-       uint64_t base;
-       uint32_t size;
-       uint32_t min_freq;
-       uint32_t max_freq;
+   char *dtnode;
+   int address_cells, size_cells;
+   uint64_t base;
+   uint32_t size;
+   uint32_t min_freq;
+   uint32_t max_freq;
 } dev;
 
-
 /*
  * code to find stuff in /proc/device-tree:
  *
@@ -61,54 +60,55 @@ static struct {
 static void *
 readdt(const char *node)
 {
-       char *path;
-       void *buf;
-       size_t sz;
+   char *path;
+   void *buf;
+   size_t sz;
 
-       (void) asprintf(&path, "%s/%s", dev.dtnode, node);
-       buf = os_read_file(path, &sz);
-       free(path);
+   (void)asprintf(&path, "%s/%s", dev.dtnode, node);
+   buf = os_read_file(path, &sz);
+   free(path);
 
-       return buf;
+   return buf;
 }
 
 static int
-find_freqs_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
+find_freqs_fn(const char *fpath, const struct stat *sb, int typeflag,
+              struct FTW *ftwbuf)
 {
-       const char *fname = fpath + ftwbuf->base;
-       size_t sz;
-
-       if (strcmp(fname, "qcom,gpu-freq") == 0) {
-               uint32_t *buf = (uint32_t *)os_read_file(fpath, &sz);
-               uint32_t freq = ntohl(buf[0]);
-               free(buf);
-               dev.max_freq = MAX2(dev.max_freq, freq);
-               dev.min_freq = MIN2(dev.min_freq, freq);
-       }
-
-       return 0;
+   const char *fname = fpath + ftwbuf->base;
+   size_t sz;
+
+   if (strcmp(fname, "qcom,gpu-freq") == 0) {
+      uint32_t *buf = (uint32_t *)os_read_file(fpath, &sz);
+      uint32_t freq = ntohl(buf[0]);
+      free(buf);
+      dev.max_freq = MAX2(dev.max_freq, freq);
+      dev.min_freq = MIN2(dev.min_freq, freq);
+   }
+
+   return 0;
 }
 
 static void
 find_freqs(void)
 {
-       char *path;
+   char *path;
 
-       dev.min_freq = ~0;
-       dev.max_freq = 0;
+   dev.min_freq = ~0;
+   dev.max_freq = 0;
 
-       (void) asprintf(&path, "%s/%s", dev.dtnode, "qcom,gpu-pwrlevels");
+   (void)asprintf(&path, "%s/%s", dev.dtnode, "qcom,gpu-pwrlevels");
 
-       nftw(path, find_freqs_fn, 64, 0);
+   nftw(path, find_freqs_fn, 64, 0);
 
-       free(path);
+   free(path);
 }
 
-static const char * compatibles[] = {
-               "qcom,adreno-3xx",
-               "qcom,kgsl-3d0",
-               "amd,imageon",
-               "qcom,adreno",
+static const char *compatibles[] = {
+   "qcom,adreno-3xx",
+   "qcom,kgsl-3d0",
+   "amd,imageon",
+   "qcom,adreno",
 };
 
 /**
@@ -118,137 +118,140 @@ static const char * compatibles[] = {
  *
  * would result in "qcom,adreno-630.2\0qcom,adreno\0"
  */
-static bool match_compatible(char *compatstrs, int sz)
+static bool
+match_compatible(char *compatstrs, int sz)
 {
-       while (sz > 0) {
-               char *compatible = compatstrs;
-
-               for (unsigned i = 0; i < ARRAY_SIZE(compatibles); i++) {
-                       if (strcmp(compatible, compatibles[i]) == 0) {
-                               return true;
-                       }
-               }
-
-               compatstrs += strlen(compatible) + 1;
-               sz -= strlen(compatible) + 1;
-       }
-       return false;
+   while (sz > 0) {
+      char *compatible = compatstrs;
+
+      for (unsigned i = 0; i < ARRAY_SIZE(compatibles); i++) {
+         if (strcmp(compatible, compatibles[i]) == 0) {
+            return true;
+         }
+      }
+
+      compatstrs += strlen(compatible) + 1;
+      sz -= strlen(compatible) + 1;
+   }
+   return false;
 }
 
 static int
-find_device_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
+find_device_fn(const char *fpath, const struct stat *sb, int typeflag,
+               struct FTW *ftwbuf)
 {
-       const char *fname = fpath + ftwbuf->base;
-       size_t sz;
-
-       if (strcmp(fname, "compatible") == 0) {
-               char *str = os_read_file(fpath, &sz);
-               if (match_compatible(str, sz)) {
-                       int dlen = strlen(fpath) - strlen("/compatible");
-                       dev.dtnode = malloc(dlen + 1);
-                       memcpy(dev.dtnode, fpath, dlen);
-                       dev.dtnode[dlen] = '\0';
-                       printf("found dt node: %s\n", dev.dtnode);
-
-                       char buf[dlen + sizeof("/../#address-cells") + 1];
-                       size_t sz;
-                       int *val;
-
-                       sprintf(buf, "%s/../#address-cells", dev.dtnode);
-                       val = (int *)os_read_file(buf, &sz);
-                       dev.address_cells = ntohl(*val);
-                       free(val);
-
-                       sprintf(buf, "%s/../#size-cells", dev.dtnode);
-                       val = (int *)os_read_file(buf, &sz);
-                       dev.size_cells = ntohl(*val);
-                       free(val);
-
-                       printf("#address-cells=%d, #size-cells=%d\n",
-                                       dev.address_cells, dev.size_cells);
-               }
-               free(str);
-       }
-       if (dev.dtnode) {
-               /* we found it! */
-               return 1;
-       }
-       return 0;
+   const char *fname = fpath + ftwbuf->base;
+   size_t sz;
+
+   if (strcmp(fname, "compatible") == 0) {
+      char *str = os_read_file(fpath, &sz);
+      if (match_compatible(str, sz)) {
+         int dlen = strlen(fpath) - strlen("/compatible");
+         dev.dtnode = malloc(dlen + 1);
+         memcpy(dev.dtnode, fpath, dlen);
+         dev.dtnode[dlen] = '\0';
+         printf("found dt node: %s\n", dev.dtnode);
+
+         char buf[dlen + sizeof("/../#address-cells") + 1];
+         size_t sz;
+         int *val;
+
+         sprintf(buf, "%s/../#address-cells", dev.dtnode);
+         val = (int *)os_read_file(buf, &sz);
+         dev.address_cells = ntohl(*val);
+         free(val);
+
+         sprintf(buf, "%s/../#size-cells", dev.dtnode);
+         val = (int *)os_read_file(buf, &sz);
+         dev.size_cells = ntohl(*val);
+         free(val);
+
+         printf("#address-cells=%d, #size-cells=%d\n", dev.address_cells,
+                dev.size_cells);
+      }
+      free(str);
+   }
+   if (dev.dtnode) {
+      /* we found it! */
+      return 1;
+   }
+   return 0;
 }
 
 static bool
 find_device(void)
 {
-       int ret;
-       uint32_t *buf, *b;
+   int ret;
+   uint32_t *buf, *b;
 
-       if (dev.dtnode)
-               return true;
+   if (dev.dtnode)
+      return true;
 
-       ret = nftw("/proc/device-tree/", find_device_fn, 64, 0);
-       if (ret < 0)
-               return false;
+   ret = nftw("/proc/device-tree/", find_device_fn, 64, 0);
+   if (ret < 0)
+      return false;
 
-       if (!dev.dtnode)
-               return false;
+   if (!dev.dtnode)
+      return false;
 
-       b = buf = readdt("reg");
+   b = buf = readdt("reg");
 
-       if (dev.address_cells == 2) {
-               uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
-               dev.base = (((uint64_t)u[0]) << 32) | u[1];
-               buf += 2;
-       } else {
-               dev.base = ntohl(buf[0]);
-               buf += 1;
-       }
+   if (dev.address_cells == 2) {
+      uint32_t u[2] = {ntohl(buf[0]), ntohl(buf[1])};
+      dev.base = (((uint64_t)u[0]) << 32) | u[1];
+      buf += 2;
+   } else {
+      dev.base = ntohl(buf[0]);
+      buf += 1;
+   }
 
-       if (dev.size_cells == 2) {
-               uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
-               dev.size = (((uint64_t)u[0]) << 32) | u[1];
-               buf += 2;
-       } else {
-               dev.size = ntohl(buf[0]);
-               buf += 1;
-       }
+   if (dev.size_cells == 2) {
+      uint32_t u[2] = {ntohl(buf[0]), ntohl(buf[1])};
+      dev.size = (((uint64_t)u[0]) << 32) | u[1];
+      buf += 2;
+   } else {
+      dev.size = ntohl(buf[0]);
+      buf += 1;
+   }
 
-       free(b);
+   free(b);
 
-       printf("i/o region at %08"PRIx64" (size: %x)\n", dev.base, dev.size);
+   printf("i/o region at %08" PRIx64 " (size: %x)\n", dev.base, dev.size);
 
-       find_freqs();
+   find_freqs();
 
-       printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
+   printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
 
-       return true;
+   return true;
 }
 
 bool
 fd_dt_find_freqs(uint32_t *min_freq, uint32_t *max_freq)
 {
-       if (!find_device())
-               return false;
+   if (!find_device())
+      return false;
 
-       *min_freq = dev.min_freq;
-       *max_freq = dev.max_freq;
+   *min_freq = dev.min_freq;
+   *max_freq = dev.max_freq;
 
-       return true;
+   return true;
 }
 
 void *
 fd_dt_find_io(void)
 {
-       if (!find_device())
-               return NULL;
+   if (!find_device())
+      return NULL;
 
-       int fd = open("/dev/mem", O_RDWR | O_SYNC);
-       if (fd < 0)
-               return NULL;
+   int fd = open("/dev/mem", O_RDWR | O_SYNC);
+   if (fd < 0)
+      return NULL;
 
-       void *io = mmap(0, dev.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, dev.base);
-       close(fd);
-       if (io == MAP_FAILED)
-               return NULL;
+   void *io =
+      mmap(0, dev.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, dev.base);
+   close(fd);
+   if (io == MAP_FAILED)
+      return NULL;
 
-       return io;
+   return io;
 }
index c08916e..ad03fdf 100644 (file)
@@ -39,7 +39,7 @@ extern "C" {
  */
 
 bool fd_dt_find_freqs(uint32_t *min_freq, uint32_t *max_freq);
-void * fd_dt_find_io(void);
+void *fd_dt_find_io(void);
 
 #ifdef __cplusplus
 } /* end of extern "C" */
index 343d512..e3a8807 100644 (file)
@@ -40,18 +40,18 @@ extern const unsigned a6xx_num_perfcntr_groups;
 const struct fd_perfcntr_group *
 fd_perfcntrs(unsigned gpu_id, unsigned *count)
 {
-       switch (gpu_id) {
-       case 200 ... 299:
-               *count = a2xx_num_perfcntr_groups;
-               return a2xx_perfcntr_groups;
-       case 500 ... 599:
-               *count = a5xx_num_perfcntr_groups;
-               return a5xx_perfcntr_groups;
-       case 600 ... 699:
-               *count = a6xx_num_perfcntr_groups;
-               return a6xx_perfcntr_groups;
-       default:
-               *count = 0;
-               return NULL;
-       }
+   switch (gpu_id) {
+   case 200 ... 299:
+      *count = a2xx_num_perfcntr_groups;
+      return a2xx_perfcntr_groups;
+   case 500 ... 599:
+      *count = a5xx_num_perfcntr_groups;
+      return a5xx_perfcntr_groups;
+   case 600 ... 699:
+      *count = a6xx_num_perfcntr_groups;
+      return a6xx_perfcntr_groups;
+   default:
+      *count = 0;
+      return NULL;
+   }
 }
index bd764e3..86c2be9 100644 (file)
@@ -42,91 +42,82 @@ extern "C" {
 
 /* Describes a single counter: */
 struct fd_perfcntr_counter {
-       /* offset of the select register to choose what to count: */
-       unsigned select_reg;
-       /* offset of the lo/hi 32b to read current counter value: */
-       unsigned counter_reg_lo;
-       unsigned counter_reg_hi;
-       /* Optional, most counters don't have enable/clear registers: */
-       unsigned enable;
-       unsigned clear;
+   /* offset of the select register to choose what to count: */
+   unsigned select_reg;
+   /* offset of the lo/hi 32b to read current counter value: */
+   unsigned counter_reg_lo;
+   unsigned counter_reg_hi;
+   /* Optional, most counters don't have enable/clear registers: */
+   unsigned enable;
+   unsigned clear;
 };
 
-
 enum fd_perfcntr_type {
-       FD_PERFCNTR_TYPE_UINT64,
-       FD_PERFCNTR_TYPE_UINT,
-       FD_PERFCNTR_TYPE_FLOAT,
-       FD_PERFCNTR_TYPE_PERCENTAGE,
-       FD_PERFCNTR_TYPE_BYTES,
-       FD_PERFCNTR_TYPE_MICROSECONDS,
-       FD_PERFCNTR_TYPE_HZ,
-       FD_PERFCNTR_TYPE_DBM,
-       FD_PERFCNTR_TYPE_TEMPERATURE,
-       FD_PERFCNTR_TYPE_VOLTS,
-       FD_PERFCNTR_TYPE_AMPS,
-       FD_PERFCNTR_TYPE_WATTS,
+   FD_PERFCNTR_TYPE_UINT64,
+   FD_PERFCNTR_TYPE_UINT,
+   FD_PERFCNTR_TYPE_FLOAT,
+   FD_PERFCNTR_TYPE_PERCENTAGE,
+   FD_PERFCNTR_TYPE_BYTES,
+   FD_PERFCNTR_TYPE_MICROSECONDS,
+   FD_PERFCNTR_TYPE_HZ,
+   FD_PERFCNTR_TYPE_DBM,
+   FD_PERFCNTR_TYPE_TEMPERATURE,
+   FD_PERFCNTR_TYPE_VOLTS,
+   FD_PERFCNTR_TYPE_AMPS,
+   FD_PERFCNTR_TYPE_WATTS,
 };
 
 /* Whether an average value per frame or a cumulative value should be
  * displayed.
  */
 enum fd_perfcntr_result_type {
-       FD_PERFCNTR_RESULT_TYPE_AVERAGE,
-       FD_PERFCNTR_RESULT_TYPE_CUMULATIVE,
+   FD_PERFCNTR_RESULT_TYPE_AVERAGE,
+   FD_PERFCNTR_RESULT_TYPE_CUMULATIVE,
 };
 
-
 /* Describes a single countable: */
 struct fd_perfcntr_countable {
-       const char *name;
-       /* selector register enum value to select this countable: */
-       unsigned selector;
+   const char *name;
+   /* selector register enum value to select this countable: */
+   unsigned selector;
 
-       /* description of the countable: */
-       enum fd_perfcntr_type query_type;
-       enum fd_perfcntr_result_type result_type;
+   /* description of the countable: */
+   enum fd_perfcntr_type query_type;
+   enum fd_perfcntr_result_type result_type;
 };
 
 /* Describes an entire counter group: */
 struct fd_perfcntr_group {
-       const char *name;
-       unsigned num_counters;
-       const struct fd_perfcntr_counter *counters;
-       unsigned num_countables;
-       const struct fd_perfcntr_countable *countables;
+   const char *name;
+   unsigned num_counters;
+   const struct fd_perfcntr_counter *counters;
+   unsigned num_countables;
+   const struct fd_perfcntr_countable *countables;
 };
 
 const struct fd_perfcntr_group *fd_perfcntrs(unsigned gpu_id, unsigned *count);
 
-#define COUNTER(_sel, _lo, _hi) {  \
-       .select_reg = REG(_sel),       \
-       .counter_reg_lo = REG(_lo),    \
-       .counter_reg_hi = REG(_hi),    \
-}
-
-#define COUNTER2(_sel, _lo, _hi, _en, _clr) { \
-       .select_reg     = REG(_sel),  \
-       .counter_reg_lo = REG(_lo),   \
-       .counter_reg_hi = REG(_hi),   \
-       .enable         = REG(_en),   \
-       .clear          = REG(_clr),  \
-}
-
-#define COUNTABLE(_selector, _query_type, _result_type) {            \
-       .name        = #_selector,                                       \
-       .selector    = _selector,                                        \
-       .query_type  = FD_PERFCNTR_TYPE_ ## _query_type,                 \
-       .result_type = FD_PERFCNTR_RESULT_TYPE_ ## _result_type,         \
-}
-
-#define GROUP(_name, _counters, _countables) {   \
-       .name           = _name,                     \
-       .num_counters   = ARRAY_SIZE(_counters),     \
-       .counters       = _counters,                 \
-       .num_countables = ARRAY_SIZE(_countables),   \
-       .countables     = _countables,               \
-}
+#define COUNTER(_sel, _lo, _hi) {                                              \
+      .select_reg = REG(_sel), .counter_reg_lo = REG(_lo),                     \
+      .counter_reg_hi = REG(_hi),                                              \
+   }
+
+#define COUNTER2(_sel, _lo, _hi, _en, _clr) {                                  \
+      .select_reg = REG(_sel), .counter_reg_lo = REG(_lo),                     \
+      .counter_reg_hi = REG(_hi), .enable = REG(_en), .clear = REG(_clr),      \
+   }
+
+#define COUNTABLE(_selector, _query_type, _result_type) {                      \
+      .name = #_selector, .selector = _selector,                               \
+      .query_type = FD_PERFCNTR_TYPE_##_query_type,                            \
+      .result_type = FD_PERFCNTR_RESULT_TYPE_##_result_type,                   \
+   }
+
+#define GROUP(_name, _counters, _countables) {                                 \
+      .name = _name, .num_counters = ARRAY_SIZE(_counters),                    \
+      .counters = _counters, .num_countables = ARRAY_SIZE(_countables),        \
+      .countables = _countables,                                               \
+   }
 
 #ifdef __cplusplus
 } /* end of extern "C" */