freedreno/ir3: Better sstall estimation
authorRob Clark <robdclark@chromium.org>
Sat, 9 Jan 2021 20:12:37 +0000 (12:12 -0800)
committerMarge Bot <eric+marge@anholt.net>
Wed, 13 Jan 2021 18:32:47 +0000 (18:32 +0000)
1) Take into account repeat/nop cycles
2) Clear sfu_delay after an (ss) sync

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7997>

src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log
src/freedreno/.gitlab-ci/reference/fd-clouds.log
src/freedreno/.gitlab-ci/reference/glxgears-a420.log
src/freedreno/ir3/ir3.c

index 8900e61..3c4e006 100644 (file)
@@ -816,13 +816,13 @@ t4                                        write SP_VS_OBJ_START_LO (a81c)
                                                        - used (full): 4-11 (cnt=8, max=11)
                                                        - input (half): 8-19 (cnt=12, max=19)
                                                        - input (full): 4-9 (cnt=6, max=9)
-                                                       - max const: 5
-
                                                        - output (half): 16-23 (cnt=8, max=23)  (estimated)
                                                        - output (full): 8-11 (cnt=4, max=11)  (estimated)
-                                                       - shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full
+
+                                                       - shaderdb: 13 instr, 7 nops, 6 non-nops, 0 mov, 0 cov
+                                                       - shaderdb: 0 last-baryf, 0 half, 3 full, 2 constlen
                                                        - shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-                                                       - shaderdb: 0 (ss), 0 (sy)
+                                                       - shaderdb: 0 sstall, 0 (ss), 0 (sy)
 00000000010541a4:                                      0000: 48a81c02 01054000 00000000
 t7                                     opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
                                                { DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 }
@@ -843,13 +843,13 @@ t7                                        opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
                                                - used (full): 4-11 (cnt=8, max=11)
                                                - input (half): 8-19 (cnt=12, max=19)
                                                - input (full): 4-9 (cnt=6, max=9)
-                                               - max const: 5
-
                                                - output (half): 16-23 (cnt=8, max=23)  (estimated)
                                                - output (full): 8-11 (cnt=4, max=11)  (estimated)
-                                               - shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full
+
+                                               - shaderdb: 13 instr, 7 nops, 6 non-nops, 0 mov, 0 cov
+                                               - shaderdb: 0 last-baryf, 0 half, 3 full, 2 constlen
                                                - shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-                                               - shaderdb: 0 (ss), 0 (sy)
+                                               - shaderdb: 0 sstall, 0 (ss), 0 (sy)
 00000000010541b0:                                      0000: 70328003 00620000 01054000 00000000
 t7                                     opcode: CP_LOAD_STATE6_GEOM (32) (8 dwords)
                                                { DST_OFF = 1 | STATE_TYPE = ST6_CONSTANTS | STATE_SRC = SS6_DIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 }
@@ -908,13 +908,13 @@ t4                                        write SP_FS_OBJ_START_LO (a983)
                                                        - used (full): 0 2-5 (cnt=5, max=5)
                                                        - input (half): 0-1 (cnt=2, max=1)
                                                        - input (full): 0 (cnt=1, max=0)
-                                                       - max const: 0
-
                                                        - output (half): 4-11 (cnt=8, max=11)  (estimated)
                                                        - output (full): 2-5 (cnt=4, max=5)  (estimated)
-                                                       - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full
+
+                                                       - shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov
+                                                       - shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen
                                                        - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-                                                       - shaderdb: 0 (ss), 0 (sy)
+                                                       - shaderdb: 0 sstall, 0 (ss), 0 (sy)
 000000000105422c:                                      0000: 40a98302 01054080 00000000
 t7                                     opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
                                                { DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_FS_SHADER | NUM_UNIT = 1 }
@@ -934,13 +934,13 @@ t7                                        opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
                                                - used (full): 0 2-5 (cnt=5, max=5)
                                                - input (half): 0-1 (cnt=2, max=1)
                                                - input (full): 0 (cnt=1, max=0)
-                                               - max const: 0
-
                                                - output (half): 4-11 (cnt=8, max=11)  (estimated)
                                                - output (full): 2-5 (cnt=4, max=5)  (estimated)
-                                               - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full
+
+                                               - shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov
+                                               - shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen
                                                - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-                                               - shaderdb: 0 (ss), 0 (sy)
+                                               - shaderdb: 0 sstall, 0 (ss), 0 (sy)
 0000000001054238:                                      0000: 70348003 00720000 01054080 00000000
 t4                                     write SP_CS_CONFIG (a9bb)
                                                SP_CS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
@@ -1502,13 +1502,13 @@ t7                      opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords)
                                - used (full): 4-11 (cnt=8, max=11)
                                - input (half): 8-19 (cnt=12, max=19)
                                - input (full): 4-9 (cnt=6, max=9)
-                               - max const: 5
-
                                - output (half): 16-23 (cnt=8, max=23)  (estimated)
                                - output (full): 8-11 (cnt=4, max=11)  (estimated)
-                               - shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full
+
+                               - shaderdb: 13 instr, 7 nops, 6 non-nops, 0 mov, 0 cov
+                               - shaderdb: 0 last-baryf, 0 half, 3 full, 2 constlen
                                - shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-                               - shaderdb: 0 (ss), 0 (sy)
+                               - shaderdb: 0 sstall, 0 (ss), 0 (sy)
 !+     00000100                        SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
 !+     00000001                        SP_VS_INSTRLEN: 1
  +     00000000                        SP_HS_WAVE_INPUT_SIZE: 0
@@ -1537,13 +1537,13 @@ t7                      opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords)
                                - used (full): 0 2-5 (cnt=5, max=5)
                                - input (half): 0-1 (cnt=2, max=1)
                                - input (full): 0 (cnt=1, max=0)
-                               - max const: 0
-
                                - output (half): 4-11 (cnt=8, max=11)  (estimated)
                                - output (full): 2-5 (cnt=4, max=5)  (estimated)
-                               - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full
+
+                               - shaderdb: 9 instr, 4 nops, 5 non-nops, 0 mov, 0 cov
+                               - shaderdb: 3 last-baryf, 0 half, 2 full, 0 constlen
                                - shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-                               - shaderdb: 0 (ss), 0 (sy)
+                               - shaderdb: 0 sstall, 0 (ss), 0 (sy)
 !+     00000100                        SP_BLEND_CNTL: { UNK8 }
  +     00000000                        SP_SRGB_CNTL: { 0 }
 !+     0000000f                        SP_FS_RENDER_COMPONENTS: { RT0 = 0xf | RT1 = 0 | RT2 = 0 | RT3 = 0 | RT4 = 0 | RT5 = 0 | RT6 = 0 | RT7 = 0 }
index 874f108..a3cfde1 100644 (file)
@@ -639,13 +639,13 @@ t4                                        write SP_VS_OBJ_START_LO (a81c)
                                                        - used (full): (cnt=0, max=0)
                                                        - input (half): (cnt=0, max=0)
                                                        - input (full): (cnt=0, max=0)
-                                                       - max const: 0
-
                                                        - output (half): (cnt=0, max=0)  (estimated)
                                                        - output (full): (cnt=0, max=0)  (estimated)
-                                                       - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+                                                       - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+                                                       - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
                                                        - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-                                                       - shaderdb: 0 (ss), 0 (sy)
+                                                       - shaderdb: 0 sstall, 0 (ss), 0 (sy)
 0000000001121038:                                      0000: 48a81c02 01011000 00000000
 t7                                     opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
                                                { DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 }
@@ -661,13 +661,13 @@ t7                                        opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
                                                - used (full): (cnt=0, max=0)
                                                - input (half): (cnt=0, max=0)
                                                - input (full): (cnt=0, max=0)
-                                               - max const: 0
-
                                                - output (half): (cnt=0, max=0)  (estimated)
                                                - output (full): (cnt=0, max=0)  (estimated)
-                                               - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+                                               - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+                                               - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
                                                - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-                                               - shaderdb: 0 (ss), 0 (sy)
+                                               - shaderdb: 0 sstall, 0 (ss), 0 (sy)
 0000000001121044:                                      0000: 70328003 00620000 01011000 00000000
 t4                                     write VPC_VAR[0].DISABLE (9212)
                                                VPC_VAR[0].DISABLE: 0xffffffff
@@ -1109,13 +1109,13 @@ t7                      opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
                                - used (full): (cnt=0, max=0)
                                - input (half): (cnt=0, max=0)
                                - input (full): (cnt=0, max=0)
-                               - max const: 0
-
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): (cnt=0, max=0)  (estimated)
-                               - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+                               - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+                               - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
                                - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-                               - shaderdb: 0 (ss), 0 (sy)
+                               - shaderdb: 0 sstall, 0 (ss), 0 (sy)
 !+     00000100                        SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
 !+     00000001                        SP_VS_INSTRLEN: 1
  +     00000000                        SP_HS_WAVE_INPUT_SIZE: 0
@@ -1954,13 +1954,13 @@ t4                                      write SP_VS_OBJ_START_LO (a81c)
                                                        - used (full): (cnt=0, max=0)
                                                        - input (half): (cnt=0, max=0)
                                                        - input (full): (cnt=0, max=0)
-                                                       - max const: 0
-
                                                        - output (half): (cnt=0, max=0)  (estimated)
                                                        - output (full): (cnt=0, max=0)  (estimated)
-                                                       - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+                                                       - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+                                                       - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
                                                        - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-                                                       - shaderdb: 0 (ss), 0 (sy)
+                                                       - shaderdb: 0 sstall, 0 (ss), 0 (sy)
 0000000001120038:                                      0000: 48a81c02 01012000 00000000
 t7                                     opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
                                                { DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_VS_SHADER | NUM_UNIT = 1 }
@@ -1976,13 +1976,13 @@ t7                                      opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
                                                - used (full): (cnt=0, max=0)
                                                - input (half): (cnt=0, max=0)
                                                - input (full): (cnt=0, max=0)
-                                               - max const: 0
-
                                                - output (half): (cnt=0, max=0)  (estimated)
                                                - output (full): (cnt=0, max=0)  (estimated)
-                                               - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+                                               - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+                                               - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
                                                - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-                                               - shaderdb: 0 (ss), 0 (sy)
+                                               - shaderdb: 0 sstall, 0 (ss), 0 (sy)
 0000000001120044:                                      0000: 70328003 00620000 01012000 00000000
 t4                                     write VPC_VAR[0].DISABLE (9212)
                                                VPC_VAR[0].DISABLE: 0xffffffff
@@ -3497,13 +3497,13 @@ t4                                      write SP_FS_OBJ_START_LO (a983)
                                                        - used (full): 0-73 (cnt=74, max=73)
                                                        - input (half): 38-41 (cnt=4, max=41)
                                                        - input (full): 19-20 (cnt=2, max=20)
-                                                       - max const: 113
-
                                                        - output (half): 8-15 (cnt=8, max=15)  (estimated)
                                                        - output (full): 4-7 (cnt=4, max=7)  (estimated)
-                                                       - shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full
+
+                                                       - shaderdb: 2414 instr, 1114 nops, 1300 non-nops, 46 mov, 2 cov
+                                                       - shaderdb: 0 last-baryf, 0 half, 19 full, 29 constlen
                                                        - shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7
-                                                       - shaderdb: 140 (ss), 0 (sy)
+                                                       - shaderdb: 1326 sstall, 140 (ss), 0 (sy)
 0000000001120158:                                      0000: 40a98302 01013000 00000000
 t7                                     opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
                                                { DST_OFF = 0 | STATE_TYPE = ST6_SHADER | STATE_SRC = SS6_INDIRECT | STATE_BLOCK = SB6_FS_SHADER | NUM_UNIT = 88 }
@@ -4920,13 +4920,13 @@ t7                                      opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
                                                - used (full): 0-73 (cnt=74, max=73)
                                                - input (half): 38-41 (cnt=4, max=41)
                                                - input (full): 19-20 (cnt=2, max=20)
-                                               - max const: 113
-
                                                - output (half): 8-15 (cnt=8, max=15)  (estimated)
                                                - output (full): 4-7 (cnt=4, max=7)  (estimated)
-                                               - shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full
+
+                                               - shaderdb: 2414 instr, 1114 nops, 1300 non-nops, 46 mov, 2 cov
+                                               - shaderdb: 0 last-baryf, 0 half, 19 full, 29 constlen
                                                - shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7
-                                               - shaderdb: 140 (ss), 0 (sy)
+                                               - shaderdb: 1326 sstall, 140 (ss), 0 (sy)
 0000000001120164:                                      0000: 70348003 16320000 01013000 00000000
 t4                                     write VFD_CONTROL_1 (a001)
                                                VFD_CONTROL_1: { REGID4VTX = r63.x | REGID4INST = r63.x | REGID4PRIMID = r63.x | REGID4VIEWID = r63.x }
@@ -5334,13 +5334,13 @@ t7                      opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
                                - used (full): (cnt=0, max=0)
                                - input (half): (cnt=0, max=0)
                                - input (full): (cnt=0, max=0)
-                               - max const: 0
-
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): (cnt=0, max=0)  (estimated)
-                               - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+                               - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+                               - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
                                - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-                               - shaderdb: 0 (ss), 0 (sy)
+                               - shaderdb: 0 sstall, 0 (ss), 0 (sy)
  +     00000100                        SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
  +     00000001                        SP_VS_INSTRLEN: 1
  +     00000000                        SP_HS_WAVE_INPUT_SIZE: 0
@@ -6772,13 +6772,13 @@ t7                      opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
                                - used (full): 0-73 (cnt=74, max=73)
                                - input (half): 38-41 (cnt=4, max=41)
                                - input (full): 19-20 (cnt=2, max=20)
-                               - max const: 113
-
                                - output (half): 8-15 (cnt=8, max=15)  (estimated)
                                - output (full): 4-7 (cnt=4, max=7)  (estimated)
-                               - shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full
+
+                               - shaderdb: 2414 instr, 1114 nops, 1300 non-nops, 46 mov, 2 cov
+                               - shaderdb: 0 last-baryf, 0 half, 19 full, 29 constlen
                                - shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7
-                               - shaderdb: 140 (ss), 0 (sy)
+                               - shaderdb: 1326 sstall, 140 (ss), 0 (sy)
 !+     00000100                        SP_BLEND_CNTL: { UNK8 }
  +     fcfcfc00                        SP_FS_OUTPUT_CNTL0: { DEPTH_REGID = r63.x | SAMPMASK_REGID = r63.x | STENCILREF_REGID = r63.x }
 !+     00000001                        SP_FS_OUTPUT_CNTL1: { MRT = 1 }
index efed6fc..c5ef966 100644 (file)
@@ -429,13 +429,13 @@ t3                        opcode: CP_LOAD_STATE4 (30) (35 dwords)
                                - used (full): (cnt=0, max=0)
                                - input (half): (cnt=0, max=0)
                                - input (full): (cnt=0, max=0)
-                               - max const: 0
-
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): (cnt=0, max=0)  (estimated)
-                               - shaderdb: 5 instructions, 4 nops, 1 non-nops, (5 instlen), 0 last-baryf, 0 half, 0 full
+
+                               - shaderdb: 5 instr, 4 nops, 1 non-nops, 0 mov, 0 cov
+                               - shaderdb: 0 last-baryf, 0 half, 0 full, 0 constlen
                                - shaderdb: 5 cat0, 0 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-                               - shaderdb: 0 (ss), 0 (sy)
+                               - shaderdb: 0 sstall, 0 (ss), 0 (sy)
 109ce1f0:                      0000: c0213000 00600000 00000000 00000000 03000000 00000000 00000000 00000000
 *
 t3                     opcode: CP_LOAD_STATE4 (30) (35 dwords)
@@ -455,13 +455,13 @@ t3                        opcode: CP_LOAD_STATE4 (30) (35 dwords)
                                - used (full): 0-3 (cnt=4, max=3)
                                - input (half): (cnt=0, max=0)
                                - input (full): (cnt=0, max=0)
-                               - max const: 3
-
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 0-3 (cnt=4, max=3)  (estimated)
-                               - shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 last-baryf, 0 half, 1 full
+
+                               - shaderdb: 9 instr, 4 nops, 5 non-nops, 4 mov, 0 cov
+                               - shaderdb: 0 last-baryf, 0 half, 1 full, 1 constlen
                                - shaderdb: 5 cat0, 4 cat1, 0 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
-                               - shaderdb: 0 (ss), 0 (sy)
+                               - shaderdb: 0 sstall, 0 (ss), 0 (sy)
 109ce27c:                      0000: c0213000 00700000 00000000 00000000 20244000 00000001 20244001 00000002
 109ce29c:                      0020: 20244002 00000003 20244003 00000000 03000000 00000000 00000000 00000000
 *
@@ -1043,13 +1043,13 @@ t3                      opcode: CP_LOAD_STATE4 (30) (131 dwords)
                                - used (full): 0-13 (cnt=14, max=13)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-5 (cnt=4, max=5)
-                               - max const: 52
-
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 6-13 (cnt=8, max=13)  (estimated)
-                               - shaderdb: 74 instructions, 27 nops, 47 non-nops, (61 instlen), 0 last-baryf, 0 half, 4 full
+
+                               - shaderdb: 74 instr, 27 nops, 47 non-nops, 7 mov, 1 cov
+                               - shaderdb: 0 last-baryf, 0 half, 4 full, 14 constlen
                                - shaderdb: 28 cat0, 8 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7
-                               - shaderdb: 1 (ss), 0 (sy)
+                               - shaderdb: 10 sstall, 1 (ss), 0 (sy)
 109ce66c:                      0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004
 109ce68c:                      0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c
 109ce6ac:                      0040: 63828006 10010002 40700000 0001100f 63828009 00001005 63818000 00000010
@@ -1085,13 +1085,13 @@ t3                      opcode: CP_LOAD_STATE4 (30) (35 dwords)
                                - used (full): 0-3 (cnt=4, max=3)
                                - input (half): (cnt=0, max=0)
                                - input (full): 0-3 (cnt=4, max=3)
-                               - max const: 0
-
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): (cnt=0, max=0)  (estimated)
-                               - shaderdb: 11 instructions, 5 nops, 6 non-nops, (11 instlen), 5 last-baryf, 0 half, 1 full
+
+                               - shaderdb: 11 instr, 5 nops, 6 non-nops, 0 mov, 0 cov
+                               - shaderdb: 5 last-baryf, 0 half, 1 full, 0 constlen
                                - shaderdb: 6 cat0, 0 cat1, 1 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7
-                               - shaderdb: 1 (ss), 0 (sy)
+                               - shaderdb: 65531 sstall, 1 (ss), 0 (sy)
 109ce878:                      0000: c0213000 00700000 00000000 00000000 00000000 01c00000 c7c60000 01c00002
 109ce898:                      0020: c7c60001 01c00004 c7c60002 01c00006 c7c60003 00002000 473090fc 00000000
 109ce8b8:                      0040: 03000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
@@ -1675,13 +1675,13 @@ t3                      opcode: CP_LOAD_STATE4 (30) (131 dwords)
                                - used (full): 0-8 10-17 (cnt=17, max=17)
                                - input (half): (cnt=0, max=0)
                                - input (full): 2-8 (cnt=7, max=8)
-                               - max const: 52
-
                                - output (half): (cnt=0, max=0)  (estimated)
                                - output (full): 10-17 (cnt=8, max=17)  (estimated)
-                               - shaderdb: 67 instructions, 23 nops, 44 non-nops, (56 instlen), 0 last-baryf, 0 half, 5 full
+
+                               - shaderdb: 67 instr, 23 nops, 44 non-nops, 4 mov, 1 cov
+                               - shaderdb: 0 last-baryf, 0 half, 5 full, 14 constlen
                                - shaderdb: 24 cat0, 5 cat1, 15 cat2, 22 cat3, 1 cat4, 0 cat5, 0 cat6, 0 cat7
-                               - shaderdb: 1 (ss), 0 (sy)
+                               - shaderdb: 10 sstall, 1 (ss), 0 (sy)
 109cee34:                      0000: c0813000 01200000 00000000 10000002 40700000 10030002 40700001 00001004
 109cee54:                      0020: 63818000 00011007 63818001 00001008 63820000 0001100b 63820001 0000100c
 109cee74:                      0040: 6382800a 10010002 40700000 0001100f 6382800d 00001005 63818000 00000010
index 0188da9..61d3c7b 100644 (file)
@@ -1002,7 +1002,7 @@ ir3_collect_info(struct ir3_shader_variant *v)
        info->sizedwords = info->size / 4;
 
        foreach_block (block, &shader->block_list) {
-               unsigned sfu_delay = 0;
+               int sfu_delay = 0;
 
                foreach_instr (instr, &block->instr_list) {
 
@@ -1050,6 +1050,7 @@ ir3_collect_info(struct ir3_shader_variant *v)
                        if (instr->flags & IR3_INSTR_SS) {
                                info->ss++;
                                info->sstall += sfu_delay;
+                               sfu_delay = 0;
                        }
 
                        if (instr->flags & IR3_INSTR_SY)
@@ -1057,8 +1058,9 @@ ir3_collect_info(struct ir3_shader_variant *v)
 
                        if (is_sfu(instr)) {
                                sfu_delay = 10;
-                       } else if (sfu_delay > 0) {
-                               sfu_delay--;
+                       } else {
+                               int n = MIN2(sfu_delay, 1 + instr->repeat + instr->nop);
+                               sfu_delay -= n;
                        }
                }
        }