From 8d437b21944327ac6b6320137133d1e35f6eacc2 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 6 Aug 2020 10:03:51 -0700 Subject: [PATCH] freedreno/ir3: add more disasm stats Add tracking for # of instructions per category, similar to the last patch. Also add a few other shader-db stats that were missing on the disasm side, to make it easier to compare to shaders from cmdstream traces. Signed-off-by: Rob Clark Part-of: --- ...w.indexed.indirect_draw_count.triangle_list.log | 18 ++++-- src/freedreno/.gitlab-ci/reference/fd-clouds.log | 27 ++++++--- .../.gitlab-ci/reference/glxgears-a420.log | 66 ++++++++++++++-------- src/freedreno/common/disasm.h | 4 ++ src/freedreno/ir3/disasm-a3xx.c | 50 ++++++++++++---- 5 files changed, 117 insertions(+), 48 deletions(-) diff --git a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log index 2471e5b..be5a809 100644 --- a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log +++ b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log @@ -821,7 +821,8 @@ t4 write SP_VS_OBJ_START_LO (a81c) - output (half): (cnt=0, max=0) (estimated) - output (full): 8-11 (cnt=4, max=11) (estimated) - - shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 half, 3 full + - shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full + - shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) 00000000010541a4: 0000: 48a81c02 01054000 00000000 t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) @@ -848,7 +849,8 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 8-11 (cnt=4, max=11) (estimated) - - shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 half, 3 full + - shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full + - shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) 00000000010541b0: 0000: 70328003 00620000 01054000 00000000 t7 opcode: CP_LOAD_STATE6_GEOM (32) (8 dwords) @@ -913,7 +915,8 @@ t4 write SP_FS_OBJ_START_LO (a983) - output (half): (cnt=0, max=0) (estimated) - output (full): 2-5 (cnt=4, max=5) (estimated) - - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full + - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full + - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) 000000000105422c: 0000: 40a98302 01054080 00000000 t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) @@ -939,7 +942,8 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 2-5 (cnt=4, max=5) (estimated) - - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full + - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full + - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) 0000000001054238: 0000: 70348003 00720000 01054080 00000000 t4 write SP_CS_CONFIG (a9bb) @@ -1507,7 +1511,8 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 8-11 (cnt=4, max=11) (estimated) - - shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 half, 3 full + - shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full + - shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) !+ 00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } !+ 00000001 SP_VS_INSTRLEN: 1 @@ -1542,7 +1547,8 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 2-5 (cnt=4, max=5) (estimated) - - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full + - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full + - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) !+ 00000100 SP_BLEND_CNTL: { UNK8 } + 00000000 SP_SRGB_CNTL: { 0 } diff --git a/src/freedreno/.gitlab-ci/reference/fd-clouds.log b/src/freedreno/.gitlab-ci/reference/fd-clouds.log index 9498629..89ae040 100644 --- a/src/freedreno/.gitlab-ci/reference/fd-clouds.log +++ b/src/freedreno/.gitlab-ci/reference/fd-clouds.log @@ -644,7 +644,8 @@ t4 write SP_VS_OBJ_START_LO (a81c) - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full + - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full + - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) 0000000001121038: 0000: 48a81c02 01011000 00000000 t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) @@ -666,7 +667,8 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full + - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full + - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) 0000000001121044: 0000: 70328003 00620000 01011000 00000000 t4 write VPC_VAR[0].DISABLE (9212) @@ -1114,7 +1116,8 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full + - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full + - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) !+ 00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } !+ 00000001 SP_VS_INSTRLEN: 1 @@ -1959,7 +1962,8 @@ t4 write SP_VS_OBJ_START_LO (a81c) - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full + - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full + - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) 0000000001120038: 0000: 48a81c02 01012000 00000000 t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) @@ -1981,7 +1985,8 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full + - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full + - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) 0000000001120044: 0000: 70328003 00620000 01012000 00000000 t4 write VPC_VAR[0].DISABLE (9212) @@ -3502,7 +3507,8 @@ t4 write SP_FS_OBJ_START_LO (a983) - output (half): (cnt=0, max=0) (estimated) - output (full): 4-7 (cnt=4, max=7) (estimated) - - shaderdb: 2414 instructions, 1355 nops, 1059 non-nops, (1406 instlen), 0 half, 19 full + - shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full + - shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 140 (ss), 0 (sy) 0000000001120158: 0000: 40a98302 01013000 00000000 t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) @@ -4925,7 +4931,8 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 4-7 (cnt=4, max=7) (estimated) - - shaderdb: 2414 instructions, 1355 nops, 1059 non-nops, (1406 instlen), 0 half, 19 full + - shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full + - shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 140 (ss), 0 (sy) 0000000001120164: 0000: 70348003 16320000 01013000 00000000 t4 write VFD_CONTROL_1 (a001) @@ -5339,7 +5346,8 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full + - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full + - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) + 00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } + 00000001 SP_VS_INSTRLEN: 1 @@ -6777,7 +6785,8 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 4-7 (cnt=4, max=7) (estimated) - - shaderdb: 2414 instructions, 1355 nops, 1059 non-nops, (1406 instlen), 0 half, 19 full + - shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full + - shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 140 (ss), 0 (sy) !+ 00000100 SP_BLEND_CNTL: { UNK8 } + fcfcfc00 SP_FS_OUTPUT_CNTL0: { DEPTH_REGID = r63.x | SAMPMASK_REGID = r63.x | STENCILREF_REGID = r63.x } diff --git a/src/freedreno/.gitlab-ci/reference/glxgears-a420.log b/src/freedreno/.gitlab-ci/reference/glxgears-a420.log index 2abe7e8..efed6fc 100644 --- a/src/freedreno/.gitlab-ci/reference/glxgears-a420.log +++ b/src/freedreno/.gitlab-ci/reference/glxgears-a420.log @@ -433,7 +433,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full + - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full + - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) 109ce1f0: 0000: c0213000 00600000 00000000 00000000 03000000 00000000 00000000 00000000 * @@ -458,7 +459,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 0-3 (cnt=4, max=3) (estimated) - - shaderdb: 9 instructions, 8 nops, 1 non-nops, (9 instlen), 0 half, 1 full + - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 last-baryf, 0 half, 1 full + - shaderdb: 5 cat0, 4 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) 109ce27c: 0000: c0213000 00700000 00000000 00000000 20244000 00000001 20244001 00000002 109ce29c: 0020: 20244002 00000003 20244003 00000000 03000000 00000000 00000000 00000000 @@ -1045,7 +1047,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 6-13 (cnt=8, max=13) (estimated) - - shaderdb: 74 instructions, 38 nops, 36 non-nops, (61 instlen), 0 half, 4 full + - shaderdb: 74 instructions, 27 nops, 47 non-nops, (61 instlen), 0 last-baryf, 0 half, 4 full + - shaderdb: 28 cat0, 8 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 1 (ss), 0 (sy) 109ce66c: 0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004 109ce68c: 0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c @@ -1086,7 +1089,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full + - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 5 last-baryf, 0 half, 1 full + - shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 - shaderdb: 1 (ss), 0 (sy) 109ce878: 0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002 109ce898: 0020: c7c60001 01c00004 c7c60002 01c00006 c7c60003 00002000 473090fc 00000000 @@ -1675,7 +1679,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 10-17 (cnt=8, max=17) (estimated) - - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full + - shaderdb: 67 instructions, 23 nops, 44 non-nops, (56 instlen), 0 last-baryf, 0 half, 5 full + - shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 1 (ss), 0 (sy) 109cee34: 0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004 109cee54: 0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c @@ -1715,7 +1720,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full + - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 5 last-baryf, 0 half, 1 full + - shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 - shaderdb: 1 (ss), 0 (sy) 109cf040: 0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002 109cf060: 0020: c7c60001 01c00004 c7c60002 01c00006 c7c60003 00002000 473090fc 00000000 @@ -2106,7 +2112,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 10-17 (cnt=8, max=17) (estimated) - - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full + - shaderdb: 67 instructions, 23 nops, 44 non-nops, (56 instlen), 0 last-baryf, 0 half, 5 full + - shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 1 (ss), 0 (sy) 109cf40c: 0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004 109cf42c: 0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c @@ -2144,7 +2151,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 2-5 (cnt=4, max=5) (estimated) - - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full + - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full + - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) 109cf618: 0000: c0213000 00700000 00000000 00002000 47300002 00002001 47300003 00002002 109cf638: 0020: 47300004 00002003 47308005 00000000 03000000 00000000 00000000 00000000 @@ -2498,7 +2506,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 6-13 (cnt=8, max=13) (estimated) - - shaderdb: 74 instructions, 38 nops, 36 non-nops, (61 instlen), 0 half, 4 full + - shaderdb: 74 instructions, 27 nops, 47 non-nops, (61 instlen), 0 last-baryf, 0 half, 4 full + - shaderdb: 28 cat0, 8 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 1 (ss), 0 (sy) 109cf96c: 0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004 109cf98c: 0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c @@ -2539,7 +2548,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full + - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 5 last-baryf, 0 half, 1 full + - shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 - shaderdb: 1 (ss), 0 (sy) 109cfb78: 0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002 109cfb98: 0020: c7c60001 01c00004 c7c60002 01c00006 c7c60003 00002000 473090fc 00000000 @@ -3051,7 +3061,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 10-17 (cnt=8, max=17) (estimated) - - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full + - shaderdb: 67 instructions, 23 nops, 44 non-nops, (56 instlen), 0 last-baryf, 0 half, 5 full + - shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 1 (ss), 0 (sy) 109d00b4: 0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004 109d00d4: 0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c @@ -3091,7 +3102,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full + - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 5 last-baryf, 0 half, 1 full + - shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 - shaderdb: 1 (ss), 0 (sy) 109d02c0: 0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002 109d02e0: 0020: c7c60001 01c00004 c7c60002 01c00006 c7c60003 00002000 473090fc 00000000 @@ -3482,7 +3494,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 10-17 (cnt=8, max=17) (estimated) - - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full + - shaderdb: 67 instructions, 23 nops, 44 non-nops, (56 instlen), 0 last-baryf, 0 half, 5 full + - shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 1 (ss), 0 (sy) 109d068c: 0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004 109d06ac: 0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c @@ -3520,7 +3533,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 2-5 (cnt=4, max=5) (estimated) - - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full + - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full + - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) 109d0898: 0000: c0213000 00700000 00000000 00002000 47300002 00002001 47300003 00002002 109d08b8: 0020: 47300004 00002003 47308005 00000000 03000000 00000000 00000000 00000000 @@ -3874,7 +3888,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 6-13 (cnt=8, max=13) (estimated) - - shaderdb: 74 instructions, 38 nops, 36 non-nops, (61 instlen), 0 half, 4 full + - shaderdb: 74 instructions, 27 nops, 47 non-nops, (61 instlen), 0 last-baryf, 0 half, 4 full + - shaderdb: 28 cat0, 8 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 1 (ss), 0 (sy) 109d0bec: 0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004 109d0c0c: 0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c @@ -3915,7 +3930,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full + - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 5 last-baryf, 0 half, 1 full + - shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 - shaderdb: 1 (ss), 0 (sy) 109d0df8: 0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002 109d0e18: 0020: c7c60001 01c00004 c7c60002 01c00006 c7c60003 00002000 473090fc 00000000 @@ -4427,7 +4443,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 10-17 (cnt=8, max=17) (estimated) - - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full + - shaderdb: 67 instructions, 23 nops, 44 non-nops, (56 instlen), 0 last-baryf, 0 half, 5 full + - shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 1 (ss), 0 (sy) 109d1334: 0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004 109d1354: 0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c @@ -4467,7 +4484,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 0 half, 1 full + - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 5 last-baryf, 0 half, 1 full + - shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 - shaderdb: 1 (ss), 0 (sy) 109d1540: 0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002 109d1560: 0020: c7c60001 01c00004 c7c60002 01c00006 c7c60003 00002000 473090fc 00000000 @@ -4858,7 +4876,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (131 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 10-17 (cnt=8, max=17) (estimated) - - shaderdb: 67 instructions, 31 nops, 36 non-nops, (56 instlen), 0 half, 5 full + - shaderdb: 67 instructions, 23 nops, 44 non-nops, (56 instlen), 0 last-baryf, 0 half, 5 full + - shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 1 (ss), 0 (sy) 109d190c: 0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004 109d192c: 0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c @@ -4896,7 +4915,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 2-5 (cnt=4, max=5) (estimated) - - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full + - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full + - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) 109d1b18: 0000: c0213000 00700000 00000000 00002000 47300002 00002001 47300003 00002002 109d1b38: 0020: 47300004 00002003 47308005 00000000 03000000 00000000 00000000 00000000 @@ -5200,7 +5220,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): (cnt=0, max=0) (estimated) - - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 half, 0 full + - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full + - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) 108ce4b0: 0000: c0213000 00600000 00000000 00000000 03000000 00000000 00000000 00000000 * @@ -5225,7 +5246,8 @@ t3 opcode: CP_LOAD_STATE4 (30) (35 dwords) - output (half): (cnt=0, max=0) (estimated) - output (full): 0-3 (cnt=4, max=3) (estimated) - - shaderdb: 9 instructions, 8 nops, 1 non-nops, (9 instlen), 0 half, 1 full + - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 last-baryf, 0 half, 1 full + - shaderdb: 5 cat0, 4 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7 - shaderdb: 0 (ss), 0 (sy) 108ce53c: 0000: c0213000 00700000 00000000 00000000 20244000 00000001 20244001 00000002 108ce55c: 0020: 20244002 00000003 20244003 00000000 03000000 00000000 00000000 00000000 diff --git a/src/freedreno/common/disasm.h b/src/freedreno/common/disasm.h index 1a2993e..0e1d850 100644 --- a/src/freedreno/common/disasm.h +++ b/src/freedreno/common/disasm.h @@ -44,6 +44,10 @@ struct shader_stats { int nops; int ss, sy; int constlen; + uint16_t mov_count; + uint16_t cov_count; + uint16_t last_baryf; + uint16_t instrs_per_cat[8]; }; int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage type); diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c index b841631..2555c73 100644 --- a/src/freedreno/ir3/disasm-a3xx.c +++ b/src/freedreno/ir3/disasm-a3xx.c @@ -277,10 +277,21 @@ static void print_reg_stats(struct disasm_ctx *ctx) // Note this count of instructions includes rptN, which matches // up to how mesa prints this: fprintf(ctx->out, "%s- shaderdb: %d instructions, %d nops, %d non-nops, " - "(%d instlen), %d half, %d full\n", + "(%d instlen), %u last-baryf, %d half, %d full\n", levels[ctx->level], ctx->stats->instructions, ctx->stats->nops, ctx->stats->instructions - ctx->stats->nops, ctx->stats->instlen, - halfreg, fullreg); + ctx->stats->last_baryf, halfreg, fullreg); + fprintf(ctx->out, "%s- shaderdb: %u cat0, %u cat1, %u cat2, %u cat3, " + "%u cat4, %u cat5, %u cat6, %u cat7\n", + levels[ctx->level], + ctx->stats->instrs_per_cat[0], + ctx->stats->instrs_per_cat[1], + ctx->stats->instrs_per_cat[2], + ctx->stats->instrs_per_cat[3], + ctx->stats->instrs_per_cat[4], + ctx->stats->instrs_per_cat[5], + ctx->stats->instrs_per_cat[6], + ctx->stats->instrs_per_cat[7]); fprintf(ctx->out, "%s- shaderdb: %d (ss), %d (sy)\n", levels[ctx->level], ctx->stats->ss, ctx->stats->sy); } @@ -1529,7 +1540,7 @@ static void print_single_instr(struct disasm_ctx *ctx, instr_t *instr) static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) { instr_t *instr = (instr_t *)dwords; - uint32_t opc = instr_opc(instr, ctx->gpu_id); + opc_t opc = _OPC(instr->opc_cat, instr_opc(instr, ctx->gpu_id)); unsigned nop = 0; unsigned cycles = ctx->stats->instructions; @@ -1538,15 +1549,18 @@ static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) instr->opc_cat, n, cycles++, dwords[1], dwords[0]); } - /* NOTE: order flags are printed is a bit fugly.. but for now I - * try to match the order in llvm-a3xx disassembler for easy - * diff'ing.. - */ + if (opc == OPC_BARY_F) + ctx->stats->last_baryf = ctx->stats->instructions; ctx->repeat = instr_repeat(instr); ctx->stats->instructions += 1 + ctx->repeat; ctx->stats->instlen++; + /* NOTE: order flags are printed is a bit fugly.. but for now I + * try to match the order in llvm-a3xx disassembler for easy + * diff'ing.. + */ + if (instr->sync) { fprintf(ctx->out, "(sy)"); ctx->stats->sy++; @@ -1567,16 +1581,30 @@ static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r; else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r)) nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r; - ctx->stats->instructions += nop; - ctx->stats->nops += nop; - if (opc == OPC_NOP) - ctx->stats->nops += 1 + ctx->repeat; if (nop) fprintf(ctx->out, "(nop%d) ", nop); if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4))) fprintf(ctx->out, "(ul)"); + ctx->stats->instructions += nop; + ctx->stats->nops += nop; + if (opc == OPC_NOP) { + ctx->stats->nops += 1 + ctx->repeat; + ctx->stats->instrs_per_cat[0] += 1 + ctx->repeat; + } else { + ctx->stats->instrs_per_cat[instr->opc_cat] += 1 + ctx->repeat; + ctx->stats->instrs_per_cat[0] += nop; + } + + if (opc == OPC_MOV) { + if (instr->cat1.src_type == instr->cat1.dst_type) { + ctx->stats->mov_count += 1 + ctx->repeat; + } else { + ctx->stats->cov_count += 1 + ctx->repeat; + } + } + print_single_instr(ctx, instr); fprintf(ctx->out, "\n"); -- 2.7.4