freedreno/a6xx: Cleanup SP_XS_CTRL_REG0 definitions
authorConnor Abbott <cwabbott0@gmail.com>
Tue, 9 Mar 2021 14:53:06 +0000 (15:53 +0100)
committerMarge Bot <eric+marge@anholt.net>
Thu, 11 Mar 2021 20:58:39 +0000 (20:58 +0000)
The registers were actually different per-stage even though we used the
same type, which resulted in a bunch of incorrectly programmed fields
and confusion. Move the stage-specific values to the registers
themselves, which makes things much less confusing and makes it possible
to set "mergedregs" correctly.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9493>

src/freedreno/.gitlab-ci/reference/crash.log
src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log
src/freedreno/.gitlab-ci/reference/fd-clouds.log
src/freedreno/computerator/a6xx.c
src/freedreno/registers/adreno/a6xx.xml
src/freedreno/vulkan/tu_pipeline.c
src/gallium/drivers/freedreno/a6xx/fd6_compute.c
src/gallium/drivers/freedreno/a6xx/fd6_program.c

index 8389724..a77a40d 100644 (file)
@@ -6911,7 +6911,7 @@ clusters:
        00000000        HLSQ_LOAD_STATE_GEOM_EXT_SRC_ADDR_HI: 0
   - cluster-name: CLUSTER_SP_VS
     - context: 0
-       00000000        SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 }
+       00000000        SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
        00000000        SP_VS_BRANCH_COND: 0
        00000000        SP_VS_PRIMITIVE_CNTL: { OUT = 0 | FLAGS_REGID = r0.x }
        00000000        SP_VS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0 | B_REGID = r0.x | B_COMPMASK = 0 }
@@ -6948,7 +6948,7 @@ clusters:
        00000080        SP_VS_TEX_COUNT: 128
        00000100        SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
        00000000        SP_VS_INSTRLEN: 0
-       00000000        SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 }
+       00000000        SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
        00000000        SP_HS_WAVE_INPUT_SIZE: 0
        00000000        SP_HS_BRANCH_COND: 0
        00000000        SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
@@ -6961,7 +6961,7 @@ clusters:
        00000080        SP_HS_TEX_COUNT: 128
        00000000        SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
        00000000        SP_HS_INSTRLEN: 0
-       00000000        SP_DS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 }
+       00000000        SP_DS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
        00000000        SP_DS_BRANCH_COND: 0
        00000000        SP_DS_PRIMITIVE_CNTL: { OUT = 0 | FLAGS_REGID = r0.x }
        00000000        SP_DS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0 | B_REGID = r0.x | B_COMPMASK = 0 }
@@ -6998,7 +6998,7 @@ clusters:
        00000080        SP_DS_TEX_COUNT: 128
        00000000        SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
        00000000        SP_DS_INSTRLEN: 0
-       00000000        SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 }
+       00000000        SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
        00000000        SP_GS_PRIM_SIZE: 0
        00000000        SP_GS_BRANCH_COND: 0
        00000000        SP_GS_PRIMITIVE_CNTL: { OUT = 0 | FLAGS_REGID = r0.x }
@@ -7057,7 +7057,7 @@ clusters:
        00000000        0xa8c2: 00000000
        00000000        0xa8c3: 00000000
     - context: 1
-       00000000        SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 }
+       00000000        SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
        00000000        SP_VS_BRANCH_COND: 0
        00000000        SP_VS_PRIMITIVE_CNTL: { OUT = 0 | FLAGS_REGID = r0.x }
        00000000        SP_VS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0 | B_REGID = r0.x | B_COMPMASK = 0 }
@@ -7094,7 +7094,7 @@ clusters:
        00000080        SP_VS_TEX_COUNT: 128
        00000100        SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
        00000000        SP_VS_INSTRLEN: 0
-       00000000        SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 }
+       00000000        SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
        00000000        SP_HS_WAVE_INPUT_SIZE: 0
        00000000        SP_HS_BRANCH_COND: 0
        00000000        SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
@@ -7107,7 +7107,7 @@ clusters:
        00000080        SP_HS_TEX_COUNT: 128
        00000000        SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
        00000000        SP_HS_INSTRLEN: 0
-       00000000        SP_DS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 }
+       00000000        SP_DS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
        00000000        SP_DS_BRANCH_COND: 0
        00000000        SP_DS_PRIMITIVE_CNTL: { OUT = 0 | FLAGS_REGID = r0.x }
        00000000        SP_DS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0 | B_REGID = r0.x | B_COMPMASK = 0 }
@@ -7144,7 +7144,7 @@ clusters:
        00000080        SP_DS_TEX_COUNT: 128
        00000000        SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
        00000000        SP_DS_INSTRLEN: 0
-       00000000        SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 }
+       00000000        SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
        00000000        SP_GS_PRIM_SIZE: 0
        00000000        SP_GS_BRANCH_COND: 0
        00000000        SP_GS_PRIMITIVE_CNTL: { OUT = 0 | FLAGS_REGID = r0.x }
@@ -7370,7 +7370,7 @@ clusters:
        deadbeef        HLSQ_2D_EVENT_CMD: { STATE_ID = 0xbe | EVENT = 0x6f | 0xdead0080 }
   - cluster-name: CLUSTER_SP_PS
     - context: 0
-       05100000        SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | UNK24 | PIXLODENABLE }
+       05100000        SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | UNK24 | PIXLODENABLE | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
        00000000        SP_FS_BRANCH_COND: 0
        00000000        SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
        4bdb43d8        SP_FS_OBJ_START: 0x4bdb43d8
@@ -7411,7 +7411,7 @@ clusters:
        00000000        SP_FS_BINDLESS_PREFETCH[0x3].CMD: { SAMP_ID = 0 | TEX_ID = 0 }
        00000080        SP_FS_TEX_COUNT: 128
        0000f000        SP_UNKNOWN_A9A8: 0xf000
-       00421800        SP_CS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 | THREADSIZE = THREAD64 | VARYING }
+       00421800        SP_CS_CTRL_REG0: { THREADSIZE = THREAD64 | UNK22 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 }
        0000001f        SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 31 }
        00000000        SP_CS_BRANCH_COND: 0
        00000000        SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
@@ -7452,7 +7452,7 @@ clusters:
        00000000        0xaa30: 00000000
        00000000        0xaa31: 00000000
     - context: 1
-       05100000        SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | UNK24 | PIXLODENABLE }
+       05100000        SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | UNK24 | PIXLODENABLE | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
        00000000        SP_FS_BRANCH_COND: 0
        00000000        SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
        4bdb43d8        SP_FS_OBJ_START: 0x4bdb43d8
@@ -7493,7 +7493,7 @@ clusters:
        00000000        SP_FS_BINDLESS_PREFETCH[0x3].CMD: { SAMP_ID = 0 | TEX_ID = 0 }
        00000080        SP_FS_TEX_COUNT: 128
        0000f000        SP_UNKNOWN_A9A8: 0xf000
-       00421800        SP_CS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 | THREADSIZE = THREAD64 | VARYING }
+       00421800        SP_CS_CTRL_REG0: { THREADSIZE = THREAD64 | UNK22 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 }
        0000001f        SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 31 }
        00000000        SP_CS_BRANCH_COND: 0
        00000000        SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
index 00938a1..f4d98cb 100644 (file)
@@ -195,10 +195,10 @@ t7                opcode: CP_SET_DRAW_STATE (43) (4 dwords)
                        { ADDR_HI = 0 }
 00000000010581ec:              0000: 70438003 00040000 00000000 00000000
 t4             write SP_HS_CTRL_REG0 (a830)
-                       SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 }
+                       SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
 00000000010581fc:              0000: 40a83001 00000000
 t4             write SP_GS_CTRL_REG0 (a870)
-                       SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 }
+                       SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
 0000000001058204:              0000: 48a87001 00000000
 t4             write GRAS_LRZ_CNTL (8100)
                        GRAS_LRZ_CNTL: { 0 }
@@ -341,8 +341,8 @@ t7          opcode: CP_BLIT (2c) (2 dwords)
  +     00000000                VFD_MULTIVIEW_CNTL: { VIEWS = 0 }
 !+     00000001                VFD_ADD_OFFSET: { VERTEX }
  +     00000000                SP_VS_OBJ_FIRST_EXEC_OFFSET: 0
- +     00000000                SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 }
- +     00000000                SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 }
+ +     00000000                SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
+ +     00000000                SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 }
  +     00000000                SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
  +     00000000                SP_UNKNOWN_A9A8: 0
 !+     00000005                SP_MODE_CONTROL: { CONSTANT_DEMOTION_ENABLE | UNK2 }
@@ -783,7 +783,7 @@ t4                                  write HLSQ_INVALIDATE_CMD (bb08)
                                                HLSQ_INVALIDATE_CMD: { VS_STATE | HS_STATE | DS_STATE | GS_STATE | FS_STATE | GFX_IBO | CS_BINDLESS = 0 | GFX_BINDLESS = 0 }
 0000000001054180:                                      0000: 40bb0801 0000009f
 t4                                     write SP_VS_CTRL_REG0 (a800)
-                                               SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS }
+                                               SP_VS_CTRL_REG0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 0 | 0x80000000 }
 0000000001054188:                                      0000: 40a80001 80100180
 t4                                     write SP_VS_CONFIG (a823)
                                                SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
@@ -863,7 +863,7 @@ t4                                  write HLSQ_GS_CNTL (b803)
                                                HLSQ_GS_CNTL: { CONSTLEN = 0 }
 0000000001054208:                                      0000: 48b80301 00000000
 t4                                     write SP_FS_CTRL_REG0 (a980)
-                                               SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 2 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS }
+                                               SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 2 | BRANCHSTACK = 0 }
 0000000001054210:                                      0000: 40a98001 81500100
 t4                                     write SP_FS_CONFIG (ab04)
                                                SP_FS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
@@ -1447,7 +1447,7 @@ t7                        opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords)
 !+     0000000f                        VFD_DEST_CNTL[0].INSTR: { WRITEMASK = 0xf | REGID = r0.x }
 !+     0000004f                        VFD_DEST_CNTL[0x1].INSTR: { WRITEMASK = 0xf | REGID = r1.x }
 !+     00000081                        VFD_DEST_CNTL[0x2].INSTR: { WRITEMASK = 0x1 | REGID = r2.x }
-!+     80100180                        SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS }
+!+     80100180                        SP_VS_CTRL_REG0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 0 | 0x80000000 }
 !+     00000002                        SP_VS_PRIMITIVE_CNTL: { OUT = 2 | FLAGS_REGID = r0.x }
 !+     0f000f08                        SP_VS_OUT[0].REG: { A_REGID = r2.x | A_COMPMASK = 0xf | B_REGID = r0.x | B_COMPMASK = 0xf }
 !+     00000400                        SP_VS_VPC_DST[0].REG: { OUTLOC0 = 0 | OUTLOC1 = 4 | OUTLOC2 = 0 | OUTLOC3 = 0 }
@@ -1480,7 +1480,7 @@ t7                        opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords)
  +     00000000                        SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
  +     00000000                        SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
  +     00000000                        SP_GS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
-!+     81500100                        SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 2 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS }
+!+     81500100                        SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 2 | BRANCHSTACK = 0 }
 !+     01054080                        SP_FS_OBJ_START: 0x1054080              base=1054000, offset=128, size=12288
 0000000001054080:                              0000: 00002000 47300002 00002001 47300003 00002002 47300004 00002003 47308005
 00000000010540a0:                              0020: 00000000 03000000 00000000 00000000 00000000 00000000 00000000 00000000
index 046a4e8..07bb702 100644 (file)
@@ -619,7 +619,7 @@ t4                                  write SP_FS_OUTPUT_CNTL0 (a98c)
                                                SP_FS_OUTPUT_CNTL0: { DEPTH_REGID = r63.x | SAMPMASK_REGID = r63.x | STENCILREF_REGID = r63.x }
 0000000001121020:                                      0000: 40a98c01 fcfcfc00
 t4                                     write SP_VS_CTRL_REG0 (a800)
-                                               SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS }
+                                               SP_VS_CTRL_REG0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | 0x80000000 }
 0000000001121028:                                      0000: 40a80001 80100080
 t4                                     write SP_VS_INSTRLEN (a824)
                                                SP_VS_INSTRLEN: 1
@@ -693,7 +693,7 @@ t4                                  write HLSQ_FS_CNTL_0 (b980)
                                                HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 }
 00000000011210b8:                                      0000: 48b98001 00000001
 t4                                     write SP_FS_CTRL_REG0 (a980)
-                                               SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | UNK24 | MERGEDREGS }
+                                               SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | UNK24 | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 }
 00000000011210c0:                                      0000: 40a98001 81100080
 t4                                     write SP_FS_OBJ_FIRST_EXEC_OFFSET (a982)
                                                SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
@@ -1076,7 +1076,7 @@ t7                        opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
 !+     00000001                        VFD_DECODE[0].STEP_RATE: 1
 !+     0000000f                        VFD_DEST_CNTL[0].INSTR: { WRITEMASK = 0xf | REGID = r0.x }
 !+     00000001                        SP_UNKNOWN_A0F8: 0x1
-!+     80100080                        SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS }
+!+     80100080                        SP_VS_CTRL_REG0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | 0x80000000 }
 !+     00000001                        SP_VS_PRIMITIVE_CNTL: { OUT = 1 | FLAGS_REGID = r0.x }
 !+     00000f00                        SP_VS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0xf | B_REGID = r0.x | B_COMPMASK = 0 }
  +     00000000                        SP_VS_VPC_DST[0].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
@@ -1103,7 +1103,7 @@ t7                        opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
  +     00000000                        SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
  +     00000000                        SP_GS_PRIM_SIZE: 0
  +     00000000                        SP_GS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
-!+     81100080                        SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | UNK24 | MERGEDREGS }
+!+     81100080                        SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | UNK24 | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 }
  +     00000000                        SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
  +     00000000                        SP_SRGB_CNTL: { 0 }
 !+     0000000f                        SP_FS_RENDER_COMPONENTS: { RT0 = 0xf | RT1 = 0 | RT2 = 0 | RT3 = 0 | RT4 = 0 | RT5 = 0 | RT6 = 0 | RT7 = 0 }
@@ -1913,7 +1913,7 @@ t4                                        write SP_FS_OUTPUT_CNTL0 (a98c)
                                                SP_FS_OUTPUT_CNTL0: { DEPTH_REGID = r63.x | SAMPMASK_REGID = r63.x | STENCILREF_REGID = r63.x }
 0000000001120020:                                      0000: 40a98c01 fcfcfc00
 t4                                     write SP_VS_CTRL_REG0 (a800)
-                                               SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS }
+                                               SP_VS_CTRL_REG0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | 0x80000000 }
 0000000001120028:                                      0000: 40a80001 80100080
 t4                                     write SP_VS_INSTRLEN (a824)
                                                SP_VS_INSTRLEN: 1
@@ -1987,7 +1987,7 @@ t4                                        write HLSQ_FS_CNTL_0 (b980)
                                                HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 }
 00000000011200b8:                                      0000: 48b98001 00000001
 t4                                     write SP_FS_CTRL_REG0 (a980)
-                                               SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 | THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS }
+                                               SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 }
 00000000011200c0:                                      0000: 40a98001 81508980
 t4                                     write SP_FS_OBJ_FIRST_EXEC_OFFSET (a982)
                                                SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
@@ -5275,7 +5275,7 @@ t7                        opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
  +     c7400000                        VFD_DECODE[0].INSTR: { IDX = 0 | OFFSET = 0 | FORMAT = FMT6_32_32_32_FLOAT | SWAP = WZYX | UNK30 | FLOAT }
  +     00000001                        VFD_DECODE[0].STEP_RATE: 1
  +     0000000f                        VFD_DEST_CNTL[0].INSTR: { WRITEMASK = 0xf | REGID = r0.x }
- +     80100080                        SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS }
+ +     80100080                        SP_VS_CTRL_REG0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | 0x80000000 }
  +     00000001                        SP_VS_PRIMITIVE_CNTL: { OUT = 1 | FLAGS_REGID = r0.x }
  +     00000f00                        SP_VS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0xf | B_REGID = r0.x | B_COMPMASK = 0 }
  +     00000000                        SP_VS_VPC_DST[0].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
@@ -5301,7 +5301,7 @@ t7                        opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
  +     00000000                        SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
  +     00000000                        SP_GS_PRIM_SIZE: 0
  +     00000000                        SP_GS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
-!+     81508980                        SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 | THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS }
+!+     81508980                        SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 }
  +     00000000                        SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
 !+     01013000                        SP_FS_OBJ_START: 0x1013000              base=1013000, offset=0, size=11264
 0000000001013000:                              0000: 40400000 204cc000 00000000 204cc006 3e99999a 204cc004 20080014 42700008
index 51bc423..7b95b54 100644 (file)
@@ -150,8 +150,7 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
                A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
                A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
                COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) |
-               A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack) |
-               COND(v->need_pixlod, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE));
+               A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack));
 
        OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
        OUT_RING(ring, 0x41);
index f1bac71..f43da37 100644 (file)
@@ -2674,7 +2674,7 @@ to upconvert to 32b float internally?
 
        <!--
        Note: this seems to always be paired with another bit in another
-       block. So far we've found the FS and CS bits.
+       block.
        -->
        <enum name="a6xx_threadsize">
                <value value="0" name="THREAD64"/>
@@ -2706,26 +2706,6 @@ to upconvert to 32b float internally?
                <bitfield name="UNK13" pos="13" type="boolean"/>
                <!-- seems to be nesting level for flow control:.. -->
                <bitfield name="BRANCHSTACK" low="14" high="19" type="uint"/>
-               <!-- note: THREADSIZE known to work for at least FS/CS -->
-               <bitfield name="THREADSIZE" pos="20" type="a6xx_threadsize"/>
-               <!-- no more fields for HS/DS/GS
-                    VS has bit21, CS has bit21-bit23, FS has all bits except bit29/bit30 -->
-               <!-- VS: ??? (blob has it set)
-                    CS: seems to make SP use less concurrent threads when possible?
-                    FS: ??? -->
-               <bitfield name="UNK21" pos="21" type="boolean"/>
-               <!-- CS: has a small impact on performance, not clear what it does
-                    FS: set to true when varyings are used -->
-               <bitfield name="VARYING" pos="22" type="boolean"/>
-               <!-- CS: separate prologue-only threads?
-                    FS: set when fine derivates are used -->
-               <bitfield name="DIFF_FINE" pos="23" type="boolean"/>
-               <!-- note: vk blob uses bit24 -->
-               <bitfield name="UNK24" pos="24" type="boolean"/>
-               <bitfield name="UNK25" pos="25" type="boolean"/>
-               <bitfield name="PIXLODENABLE" pos="26" type="boolean"/>
-               <bitfield name="UNK27" low="27" high="28"/>
-               <bitfield name="MERGEDREGS" pos="31" type="boolean"/>
        </bitset>
 
        <bitset name="a6xx_sp_xs_config" inline="yes">
@@ -2755,7 +2735,11 @@ to upconvert to 32b float internally?
                <bitfield name="FLAGS_REGID" low="6" high="13" type="a3xx_regid"/>
        </bitset>
 
-       <reg32 offset="0xa800" name="SP_VS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0"/>
+       <reg32 offset="0xa800" name="SP_VS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0">
+               <bitfield name="MERGEDREGS" pos="20" type="boolean"/>
+               <!-- ??? (blob has it set) -->
+               <bitfield name="UNK21" pos="21" type="boolean"/>
+       </reg32>
        <!-- bitmask of true/false conditions for VS brac.N instructions,
             bit N corresponds to brac.N -->
        <reg32 offset="0xa801" name="SP_VS_BRANCH_COND" type="hex"/>
@@ -2855,7 +2839,13 @@ to upconvert to 32b float internally?
        <reg32 offset="0xa824" name="SP_VS_INSTRLEN" low="0" high="27" type="uint"/>
        <reg32 offset="0xa825" name="SP_VS_PVT_MEM_HW_STACK_OFFSET" low="0" high="18" shr="11"/>
 
-       <reg32 offset="0xa830" name="SP_HS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0"/>
+       <reg32 offset="0xa830" name="SP_HS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0">
+               <!--
+               There is no mergedregs bit, that comes from the previous stage (VS).
+               No idea what this bit does here.
+               -->
+               <bitfield name="UNK20" pos="20" type="boolean"/>
+       </reg32>
        <!--
        Total size of local storage in dwords divided by the wave size.
        The maximum value is 64. With the wave size being always 64 for HS,
@@ -2876,7 +2866,9 @@ to upconvert to 32b float internally?
        <reg32 offset="0xa83c" name="SP_HS_INSTRLEN" low="0" high="27" type="uint"/>
        <reg32 offset="0xa83d" name="SP_HS_PVT_MEM_HW_STACK_OFFSET" low="0" high="18" shr="11"/>
 
-       <reg32 offset="0xa840" name="SP_DS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0"/>
+       <reg32 offset="0xa840" name="SP_DS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0">
+               <bitfield name="MERGEDREGS" pos="20" type="boolean"/>
+       </reg32>
        <reg32 offset="0xa841" name="SP_DS_BRANCH_COND" type="hex"/>
 
        <!-- TODO: exact same layout as 0xa802-0xa81a -->
@@ -2909,7 +2901,13 @@ to upconvert to 32b float internally?
        <reg32 offset="0xa864" name="SP_DS_INSTRLEN" low="0" high="27" type="uint"/>
        <reg32 offset="0xa865" name="SP_DS_PVT_MEM_HW_STACK_OFFSET" low="0" high="18" shr="11"/>
 
-       <reg32 offset="0xa870" name="SP_GS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0"/>
+       <reg32 offset="0xa870" name="SP_GS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0">
+               <!--
+               There is no mergedregs bit, that comes from the previous stage (VS/DS).
+               No idea what this bit does here.
+               -->
+               <bitfield name="UNK20" pos="20" type="boolean"/>
+       </reg32>
        <reg32 offset="0xa871" name="SP_GS_PRIM_SIZE" low="0" high="7" type="uint"/> <!-- size of output of previous stage -->
        <reg32 offset="0xa872" name="SP_GS_BRANCH_COND" type="hex"/>
 
@@ -2955,7 +2953,18 @@ to upconvert to 32b float internally?
 
        <!-- TODO: 4 unknown bool registers 0xa8c0-0xa8c3 -->
 
-       <reg32 offset="0xa980" name="SP_FS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0"/>
+       <reg32 offset="0xa980" name="SP_FS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0">
+               <bitfield name="THREADSIZE" pos="20" type="a6xx_threadsize"/>
+               <bitfield name="UNK21" pos="21" type="boolean"/>
+               <bitfield name="VARYING" pos="22" type="boolean"/>
+               <bitfield name="DIFF_FINE" pos="23" type="boolean"/>
+               <!-- note: vk blob uses bit24 -->
+               <bitfield name="UNK24" pos="24" type="boolean"/>
+               <bitfield name="UNK25" pos="25" type="boolean"/>
+               <bitfield name="PIXLODENABLE" pos="26" type="boolean"/>
+               <bitfield name="UNK27" low="27" high="28"/>
+               <bitfield name="MERGEDREGS" pos="31" type="boolean"/>
+       </reg32>
        <reg32 offset="0xa981" name="SP_FS_BRANCH_COND" type="hex"/>
        <reg32 offset="0xa982" name="SP_FS_OBJ_FIRST_EXEC_OFFSET" type="uint"/>
        <reg64 offset="0xa983" name="SP_FS_OBJ_START" type="address" align="32"/>
@@ -3058,7 +3067,16 @@ to upconvert to 32b float internally?
 
 
 
-       <reg32 offset="0xa9b0" name="SP_CS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0"/>
+       <reg32 offset="0xa9b0" name="SP_CS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0">
+               <bitfield name="THREADSIZE" pos="20" type="a6xx_threadsize"/>
+               <!-- seems to make SP use less concurrent threads when possible? -->
+               <bitfield name="UNK21" pos="21" type="boolean"/>
+               <!-- has a small impact on performance, not clear what it does -->
+               <bitfield name="UNK22" pos="22" type="boolean"/>
+               <!-- creates a separate prolog-only thread? -->
+               <bitfield name="SEPARATEPROLOG" pos="23" type="boolean"/>
+               <bitfield name="MERGEDREGS" pos="31" type="boolean"/>
+       </reg32>
 
        <!-- set for compute shaders -->
        <reg32 offset="0xa9b1" name="SP_CS_UNKNOWN_A9B1">
index 4ddd50f..5ffc1f7 100644 (file)
@@ -391,29 +391,63 @@ tu6_emit_xs_config(struct tu_cs *cs,
       return;
    }
 
-   bool is_fs = xs->type == MESA_SHADER_FRAGMENT;
-   enum a6xx_threadsize threadsize = THREAD128;
-
-   /* TODO: We probably should be setting the VS threadsize to 64 if paired
-    * with a GS, and HS + DS threadsize to 64 like freedreno. However this
-    * should probably come from ir3.
-    */
-   if (xs->type == MESA_SHADER_GEOMETRY)
-      threadsize = THREAD64;
-
-   tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_ctrl, 1);
-   tu_cs_emit(cs,
-              A6XX_SP_VS_CTRL_REG0_THREADSIZE(threadsize) |
-              A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(xs->info.max_reg + 1) |
-              A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(xs->info.max_half_reg + 1) |
-              COND(xs->mergedregs, A6XX_SP_VS_CTRL_REG0_MERGEDREGS) |
-              A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(xs->branchstack) |
-              COND(xs->need_pixlod, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
-              COND(xs->need_fine_derivatives, A6XX_SP_VS_CTRL_REG0_DIFF_FINE) |
-              /* only fragment shader sets VARYING bit */
-              COND(xs->total_in && is_fs, A6XX_SP_FS_CTRL_REG0_VARYING) |
-              /* unknown bit, seems unnecessary */
-              COND(is_fs, 0x1000000));
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+      tu_cs_emit_regs(cs, A6XX_SP_VS_CTRL_REG0(
+               .fullregfootprint = xs->info.max_reg + 1,
+               .halfregfootprint = xs->info.max_half_reg + 1,
+               .branchstack = xs->branchstack,
+               .mergedregs = xs->mergedregs,
+      ));
+      break;
+   case MESA_SHADER_TESS_CTRL:
+      tu_cs_emit_regs(cs, A6XX_SP_HS_CTRL_REG0(
+               .fullregfootprint = xs->info.max_reg + 1,
+               .halfregfootprint = xs->info.max_half_reg + 1,
+               .branchstack = xs->branchstack,
+      ));
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      tu_cs_emit_regs(cs, A6XX_SP_DS_CTRL_REG0(
+               .fullregfootprint = xs->info.max_reg + 1,
+               .halfregfootprint = xs->info.max_half_reg + 1,
+               .branchstack = xs->branchstack,
+               .mergedregs = xs->mergedregs,
+      ));
+      break;
+   case MESA_SHADER_GEOMETRY:
+      tu_cs_emit_regs(cs, A6XX_SP_GS_CTRL_REG0(
+               .fullregfootprint = xs->info.max_reg + 1,
+               .halfregfootprint = xs->info.max_half_reg + 1,
+               .branchstack = xs->branchstack,
+      ));
+      break;
+   case MESA_SHADER_FRAGMENT:
+      tu_cs_emit_regs(cs, A6XX_SP_FS_CTRL_REG0(
+               .fullregfootprint = xs->info.max_reg + 1,
+               .halfregfootprint = xs->info.max_half_reg + 1,
+               .branchstack = xs->branchstack,
+               .mergedregs = xs->mergedregs,
+               .threadsize = THREAD128,
+               .pixlodenable = xs->need_pixlod,
+               .diff_fine = xs->need_fine_derivatives,
+               .varying = xs->total_in != 0,
+               /* unknown bit, seems unnecessary */
+               .unk24 = true,
+      ));
+      break;
+   case MESA_SHADER_COMPUTE:
+      tu_cs_emit_regs(cs, A6XX_SP_CS_CTRL_REG0(
+               .fullregfootprint = xs->info.max_reg + 1,
+               .halfregfootprint = xs->info.max_half_reg + 1,
+               .branchstack = xs->branchstack,
+               .mergedregs = xs->mergedregs,
+               .threadsize = THREAD128,
+      ));
+      break;
+   default:
+      unreachable("bad shader stage");
+   }
 
    tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_config, 2);
    tu_cs_emit(cs, A6XX_SP_VS_CONFIG_ENABLED |
index ff1058e..652e239 100644 (file)
@@ -74,8 +74,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
                        A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
                        A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
                        COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) |
-                       A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack) |
-                       COND(v->need_pixlod, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE));
+                       A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack));
 
        uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
        OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
index f9c4009..b54eee8 100644 (file)
@@ -450,20 +450,12 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
                         COND(fs_has_dual_src_color,
                                        A6XX_SP_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
 
-       enum a6xx_threadsize vssz;
-       if (ds || hs) {
-               vssz = THREAD64;
-       } else {
-               vssz = THREAD128;
-       }
-
        OUT_PKT4(ring, REG_A6XX_SP_VS_CTRL_REG0, 1);
-       OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_THREADSIZE(vssz) |
+       OUT_RING(ring,
                        A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) |
                        A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vs->info.max_half_reg + 1) |
                        COND(vs->mergedregs, A6XX_SP_VS_CTRL_REG0_MERGEDREGS) |
-                       A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack) |
-                       COND(vs->need_pixlod, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE));
+                       A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack));
 
        fd6_emit_shader(ctx, ring, vs);
        fd6_emit_immediates(ctx->screen, vs, ring);
@@ -577,25 +569,23 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
        }
 
        if (hs) {
+               assert(vs->mergedregs == hs->mergedregs);
                OUT_PKT4(ring, REG_A6XX_SP_HS_CTRL_REG0, 1);
-               OUT_RING(ring, A6XX_SP_HS_CTRL_REG0_THREADSIZE(THREAD64) |
+               OUT_RING(ring,
                        A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(hs->info.max_reg + 1) |
                        A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(hs->info.max_half_reg + 1) |
-                       COND(hs->mergedregs, A6XX_SP_HS_CTRL_REG0_MERGEDREGS) |
-                       A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(hs->branchstack) |
-                       COND(hs->need_pixlod, A6XX_SP_HS_CTRL_REG0_PIXLODENABLE));
+                       A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(hs->branchstack));
 
                fd6_emit_shader(ctx, ring, hs);
                fd6_emit_immediates(ctx->screen, hs, ring);
                fd6_emit_link_map(ctx->screen, vs, hs, ring);
 
                OUT_PKT4(ring, REG_A6XX_SP_DS_CTRL_REG0, 1);
-               OUT_RING(ring, A6XX_SP_DS_CTRL_REG0_THREADSIZE(THREAD64) |
+               OUT_RING(ring,
                        A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(ds->info.max_reg + 1) |
                        A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(ds->info.max_half_reg + 1) |
                        COND(ds->mergedregs, A6XX_SP_DS_CTRL_REG0_MERGEDREGS) |
-                       A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ds->branchstack) |
-                       COND(ds->need_pixlod, A6XX_SP_DS_CTRL_REG0_PIXLODENABLE));
+                       A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ds->branchstack));
 
                fd6_emit_shader(ctx, ring, ds);
                fd6_emit_immediates(ctx->screen, ds, ring);
@@ -804,13 +794,12 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
                         A6XX_VPC_VS_PACK_STRIDE_IN_VPC(l.max_loc));
 
        if (gs) {
+               assert(gs->mergedregs == (ds ? ds->mergedregs : vs->mergedregs));
                OUT_PKT4(ring, REG_A6XX_SP_GS_CTRL_REG0, 1);
-               OUT_RING(ring, A6XX_SP_GS_CTRL_REG0_THREADSIZE(THREAD64) |
+               OUT_RING(ring,
                        A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(gs->info.max_reg + 1) |
                        A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(gs->info.max_half_reg + 1) |
-                       COND(gs->mergedregs, A6XX_SP_GS_CTRL_REG0_MERGEDREGS) |
-                       A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(gs->branchstack) |
-                       COND(gs->need_pixlod, A6XX_SP_GS_CTRL_REG0_PIXLODENABLE));
+                       A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(gs->branchstack));
 
                fd6_emit_shader(ctx, ring, gs);
                fd6_emit_immediates(ctx->screen, gs, ring);