From 79921b81bcf8377c41880b3c15bef163b5e8ff3f Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 10 Sep 2020 14:02:12 +0200 Subject: [PATCH] freedreno/a6xx: Document threadsize-related fields We'll need to use if we want to start playing around with thread sizes. At least now we know what the actual threadsize is. Part-of: --- src/freedreno/.gitlab-ci/reference/crash.log | 36 +++++++++++----------- ...w.indexed.indirect_draw_count.triangle_list.log | 22 ++++++------- src/freedreno/.gitlab-ci/reference/fd-clouds.log | 28 ++++++++--------- src/freedreno/computerator/a6xx.c | 7 +++-- src/freedreno/registers/adreno/a6xx.xml | 27 +++++++++++++--- src/freedreno/vulkan/tu_pipeline.c | 22 +++++++------ src/gallium/drivers/freedreno/a6xx/fd6_compute.c | 7 +++-- src/gallium/drivers/freedreno/a6xx/fd6_program.c | 21 +++++++------ 8 files changed, 96 insertions(+), 74 deletions(-) diff --git a/src/freedreno/.gitlab-ci/reference/crash.log b/src/freedreno/.gitlab-ci/reference/crash.log index 463fc49..a6c120a 100644 --- a/src/freedreno/.gitlab-ci/reference/crash.log +++ b/src/freedreno/.gitlab-ci/reference/crash.log @@ -6911,7 +6911,7 @@ clusters: 00000000 HLSQ_LOAD_STATE_GEOM_EXT_SRC_ADDR+0x1: 0 - cluster-name: CLUSTER_SP_VS - context: 0 - 00000000 SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS } + 00000000 SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } 00000000 SP_VS_BRANCH_COND: 0 00000000 SP_VS_PRIMITIVE_CNTL: { OUT = 0 } 00000000 SP_VS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0 | B_REGID = r0.x | B_COMPMASK = 0 } @@ -6948,7 +6948,7 @@ clusters: 00000080 SP_VS_TEX_COUNT: 128 00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } 00000000 SP_VS_INSTRLEN: 0 - 00000000 SP_HS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS } + 00000000 SP_HS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } 00000000 SP_HS_WAVE_INPUT_SIZE: 0 00000000 0xa832: 00000000 00000000 SP_HS_OBJ_FIRST_EXEC_OFFSET: 0 @@ -6961,7 +6961,7 @@ clusters: 00000080 SP_HS_TEX_COUNT: 128 00000000 SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } 00000000 SP_HS_INSTRLEN: 0 - 00000000 SP_DS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS } + 00000000 SP_DS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } 00000000 0xa841: 00000000 00000000 SP_DS_PRIMITIVE_CNTL: { OUT = 0 } 00000000 SP_DS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0 | B_REGID = r0.x | B_COMPMASK = 0 } @@ -6998,7 +6998,7 @@ clusters: 00000080 SP_DS_TEX_COUNT: 128 00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } 00000000 SP_DS_INSTRLEN: 0 - 00000000 SP_GS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS } + 00000000 SP_GS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } 00000000 SP_GS_PRIM_SIZE: 0 00000000 SP_GS_BRANCH_COND: 0 00000000 SP_GS_PRIMITIVE_CNTL: { OUT = 0 | FLAGS_REGID = r0.x } @@ -7057,7 +7057,7 @@ clusters: 00000000 0xa8c2: 00000000 00000000 0xa8c3: 00000000 - context: 1 - 00000000 SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS } + 00000000 SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } 00000000 SP_VS_BRANCH_COND: 0 00000000 SP_VS_PRIMITIVE_CNTL: { OUT = 0 } 00000000 SP_VS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0 | B_REGID = r0.x | B_COMPMASK = 0 } @@ -7094,7 +7094,7 @@ clusters: 00000080 SP_VS_TEX_COUNT: 128 00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } 00000000 SP_VS_INSTRLEN: 0 - 00000000 SP_HS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS } + 00000000 SP_HS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } 00000000 SP_HS_WAVE_INPUT_SIZE: 0 00000000 0xa832: 00000000 00000000 SP_HS_OBJ_FIRST_EXEC_OFFSET: 0 @@ -7107,7 +7107,7 @@ clusters: 00000080 SP_HS_TEX_COUNT: 128 00000000 SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } 00000000 SP_HS_INSTRLEN: 0 - 00000000 SP_DS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS } + 00000000 SP_DS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } 00000000 0xa841: 00000000 00000000 SP_DS_PRIMITIVE_CNTL: { OUT = 0 } 00000000 SP_DS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0 | B_REGID = r0.x | B_COMPMASK = 0 } @@ -7144,7 +7144,7 @@ clusters: 00000080 SP_DS_TEX_COUNT: 128 00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } 00000000 SP_DS_INSTRLEN: 0 - 00000000 SP_GS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS } + 00000000 SP_GS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } 00000000 SP_GS_PRIM_SIZE: 0 00000000 SP_GS_BRANCH_COND: 0 00000000 SP_GS_PRIMITIVE_CNTL: { OUT = 0 | FLAGS_REGID = r0.x } @@ -7298,7 +7298,7 @@ clusters: deadbeef 0xb382: deadbeef - cluster-name: CLUSTER_SP_PS - context: 0 - 00000001 HLSQ_UNKNOWN_B980: 0x1 + 00000001 HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 } 00000007 HLSQ_CONTROL_1_REG: 0x7 fcfcfcfc HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | SIZE = r63.x } fcfcfcfc HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x } @@ -7312,8 +7312,8 @@ clusters: 00000000 HLSQ_CS_NDRANGE_4: { GLOBALOFF_Y = 0 } 00000001 HLSQ_CS_NDRANGE_5: { GLOBALSIZE_Z = 1 } 00000000 HLSQ_CS_NDRANGE_6: { GLOBALOFF_Z = 0 } - 00dcd8d4 HLSQ_CS_CNTL_0: { WGIDCONSTID = r53.x | UNK0 = r54.x | UNK1 = r55.x | LOCALIDREGID = r0.x } - 000000fc HLSQ_CS_UNKNOWN_B998: 0xfc + 00dcd8d4 HLSQ_CS_CNTL_0: { WGIDCONSTID = r53.x | WGSIZECONSTID = r54.x | WGOFFSETCONSTID = r55.x | LOCALIDREGID = r0.x } + 000000fc HLSQ_CS_CNTL_1: { LINEARLOCALIDREGID = r63.x | THREADSIZE = THREAD64 } 00000002 HLSQ_CS_KERNEL_GROUP_X: 0x2 00000001 HLSQ_CS_KERNEL_GROUP_Y: 0x1 00000001 HLSQ_CS_KERNEL_GROUP_Z: 0x1 @@ -7331,7 +7331,7 @@ clusters: 00000000 HLSQ_CS_BINDLESS_BASE[0x4].ADDR: 0 00000000 HLSQ_CS_BINDLESS_BASE[0x4].ADDR+0x1: 0 - context: 1 - 00000001 HLSQ_UNKNOWN_B980: 0x1 + 00000001 HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 } 00000007 HLSQ_CONTROL_1_REG: 0x7 fcfcfcfc HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | SIZE = r63.x } fcfcfcfc HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x } @@ -7345,8 +7345,8 @@ clusters: 00000000 HLSQ_CS_NDRANGE_4: { GLOBALOFF_Y = 0 } 00000001 HLSQ_CS_NDRANGE_5: { GLOBALSIZE_Z = 1 } 00000000 HLSQ_CS_NDRANGE_6: { GLOBALOFF_Z = 0 } - 00dcd8d4 HLSQ_CS_CNTL_0: { WGIDCONSTID = r53.x | UNK0 = r54.x | UNK1 = r55.x | LOCALIDREGID = r0.x } - 000000fc HLSQ_CS_UNKNOWN_B998: 0xfc + 00dcd8d4 HLSQ_CS_CNTL_0: { WGIDCONSTID = r53.x | WGSIZECONSTID = r54.x | WGOFFSETCONSTID = r55.x | LOCALIDREGID = r0.x } + 000000fc HLSQ_CS_CNTL_1: { LINEARLOCALIDREGID = r63.x | THREADSIZE = THREAD64 } 00000002 HLSQ_CS_KERNEL_GROUP_X: 0x2 00000001 HLSQ_CS_KERNEL_GROUP_Y: 0x1 00000001 HLSQ_CS_KERNEL_GROUP_Z: 0x1 @@ -7370,7 +7370,7 @@ clusters: deadbeef HLSQ_2D_EVENT_CMD: { STATE_ID = 0xbe | EVENT = 0x6f | 0xdead0080 } - cluster-name: CLUSTER_SP_PS - context: 0 - 05100000 SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | PIXLODENABLE | 0x1000000 } + 05100000 SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | PIXLODENABLE | 0x1000000 } 00000000 SP_FS_BRANCH_COND: 0 00000000 SP_FS_OBJ_FIRST_EXEC_OFFSET: 0 4bdb43d8 SP_FS_OBJ_START_LO: 0x4bdb43d8 @@ -7411,7 +7411,7 @@ clusters: 00000000 SP_FS_BINDLESS_PREFETCH[0x3].CMD: { SAMP_ID = 0 | TEX_ID = 0 } 00000080 SP_FS_TEX_COUNT: 128 0000f000 SP_UNKNOWN_A9A8: 0xf000 - 00421800 SP_CS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 | THREADSIZE = TWO_QUADS | VARYING } + 00421800 SP_CS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 | THREADSIZE = THREAD64 | VARYING } 0000001f SP_CS_UNKNOWN_A9B1: 31 00000000 0xa9b2: 00000000 00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0 @@ -7452,7 +7452,7 @@ clusters: 00000000 0xaa30: 00000000 00000000 0xaa31: 00000000 - context: 1 - 05100000 SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | PIXLODENABLE | 0x1000000 } + 05100000 SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | PIXLODENABLE | 0x1000000 } 00000000 SP_FS_BRANCH_COND: 0 00000000 SP_FS_OBJ_FIRST_EXEC_OFFSET: 0 4bdb43d8 SP_FS_OBJ_START_LO: 0x4bdb43d8 @@ -7493,7 +7493,7 @@ clusters: 00000000 SP_FS_BINDLESS_PREFETCH[0x3].CMD: { SAMP_ID = 0 | TEX_ID = 0 } 00000080 SP_FS_TEX_COUNT: 128 0000f000 SP_UNKNOWN_A9A8: 0xf000 - 00421800 SP_CS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 | THREADSIZE = TWO_QUADS | VARYING } + 00421800 SP_CS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 | THREADSIZE = THREAD64 | VARYING } 0000001f SP_CS_UNKNOWN_A9B1: 31 00000000 0xa9b2: 00000000 00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0 diff --git a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log index 9c09f98..4dd5f1f 100644 --- a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log +++ b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log @@ -195,10 +195,10 @@ t7 opcode: CP_SET_DRAW_STATE (43) (4 dwords) { ADDR_HI = 0 } 00000000010581ec: 0000: 70438003 00040000 00000000 00000000 t4 write SP_HS_CTRL_REG0 (a830) - SP_HS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS } + SP_HS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } 00000000010581fc: 0000: 40a83001 00000000 t4 write SP_GS_CTRL_REG0 (a870) - SP_GS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS } + SP_GS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } 0000000001058204: 0000: 48a87001 00000000 t4 write GRAS_LRZ_CNTL (8100) GRAS_LRZ_CNTL: { 0 } @@ -341,8 +341,8 @@ t7 opcode: CP_BLIT (2c) (2 dwords) + 00000000 VFD_MULTIVIEW_CNTL: { VIEWS = 0 } !+ 00000001 VFD_ADD_OFFSET: { VERTEX } + 00000000 SP_VS_OBJ_FIRST_EXEC_OFFSET: 0 - + 00000000 SP_HS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS } - + 00000000 SP_GS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS } + + 00000000 SP_HS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } + + 00000000 SP_GS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } + 00000000 SP_FS_OBJ_FIRST_EXEC_OFFSET: 0 + 00000000 SP_UNKNOWN_A9A8: 0 !+ 00000005 SP_MODE_CONTROL: { CONSTANT_DEMOTION_ENABLE | 0x4 } @@ -783,7 +783,7 @@ t4 write HLSQ_INVALIDATE_CMD (bb08) HLSQ_INVALIDATE_CMD: { VS_STATE | HS_STATE | DS_STATE | GS_STATE | FS_STATE | GFX_IBO | CS_BINDLESS = 0 | GFX_BINDLESS = 0 } 0000000001054180: 0000: 40bb0801 0000009f t4 write SP_VS_CTRL_REG0 (a800) - SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | MERGEDREGS } + SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS } 0000000001054188: 0000: 40a80001 80100180 t4 write SP_VS_CONFIG (a823) SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } @@ -863,7 +863,7 @@ t4 write HLSQ_GS_CNTL (b803) HLSQ_GS_CNTL: { CONSTLEN = 0 } 0000000001054208: 0000: 48b80301 00000000 t4 write SP_FS_CTRL_REG0 (a980) - SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 2 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | VARYING | MERGEDREGS | 0x1000000 } + SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 2 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | VARYING | MERGEDREGS | 0x1000000 } 0000000001054210: 0000: 40a98001 81500100 t4 write SP_FS_CONFIG (ab04) SP_FS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } @@ -1006,8 +1006,8 @@ t4 write HLSQ_CONTROL_1_REG (b982) HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r63.x | ZWCOORDREGID = r63.x } HLSQ_CONTROL_5_REG: 0xfc 000000000105434c: 0000: 40b98285 00000007 fcfcfcfc fcfcfc00 fcfcfcfc 000000fc -t4 write HLSQ_UNKNOWN_B980 (b980) - HLSQ_UNKNOWN_B980: 0x3 +t4 write HLSQ_FS_CNTL_0 (b980) + HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 | VARYINGS } 0000000001054364: 0000: 48b98001 00000003 t4 write GRAS_CNTL (8005) GRAS_CNTL: { IJ_PERSP_PIXEL | COORD_MASK = 0 } @@ -1447,7 +1447,7 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords) !+ 0000000f VFD_DEST_CNTL[0].INSTR: { WRITEMASK = 0xf | REGID = r0.x } !+ 0000004f VFD_DEST_CNTL[0x1].INSTR: { WRITEMASK = 0xf | REGID = r1.x } !+ 00000081 VFD_DEST_CNTL[0x2].INSTR: { WRITEMASK = 0x1 | REGID = r2.x } -!+ 80100180 SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | MERGEDREGS } +!+ 80100180 SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS } !+ 00000002 SP_VS_PRIMITIVE_CNTL: { OUT = 2 } !+ 0f000f08 SP_VS_OUT[0].REG: { A_REGID = r2.x | A_COMPMASK = 0xf | B_REGID = r0.x | B_COMPMASK = 0xf } !+ 00000400 SP_VS_VPC_DST[0].REG: { OUTLOC0 = 0 | OUTLOC1 = 4 | OUTLOC2 = 0 | OUTLOC3 = 0 } @@ -1480,7 +1480,7 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords) + 00000000 SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } + 00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } + 00000000 SP_GS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } -!+ 81500100 SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 2 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | VARYING | MERGEDREGS | 0x1000000 } +!+ 81500100 SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 2 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | VARYING | MERGEDREGS | 0x1000000 } !+ 01054080 SP_FS_OBJ_START_LO: 0x1054080 base=1054000, offset=128, size=12288 + 00000000 SP_FS_OBJ_START_HI: 0 base=1054000, offset=128, size=12288 0000000001054080: 0000: 00002000 47300002 00002001 47300003 00002002 47300004 00002003 47308005 @@ -1527,7 +1527,7 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords) + 00000000 HLSQ_HS_CNTL: { CONSTLEN = 0 } + 00000000 HLSQ_DS_CNTL: { CONSTLEN = 0 } + 00000000 HLSQ_GS_CNTL: { CONSTLEN = 0 } -!+ 00000003 HLSQ_UNKNOWN_B980: 0x3 +!+ 00000003 HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 | VARYINGS } !+ 00000007 HLSQ_CONTROL_1_REG: 0x7 !+ fcfcfcfc HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | SIZE = r63.x } !+ fcfcfc00 HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r0.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x } diff --git a/src/freedreno/.gitlab-ci/reference/fd-clouds.log b/src/freedreno/.gitlab-ci/reference/fd-clouds.log index 74dcc31..6ac06ff 100644 --- a/src/freedreno/.gitlab-ci/reference/fd-clouds.log +++ b/src/freedreno/.gitlab-ci/reference/fd-clouds.log @@ -619,7 +619,7 @@ t4 write SP_FS_OUTPUT_CNTL0 (a98c) SP_FS_OUTPUT_CNTL0: { DEPTH_REGID = r63.x | SAMPMASK_REGID = r63.x | STENCILREF_REGID = r63.x } 0000000001121020: 0000: 40a98c01 fcfcfc00 t4 write SP_VS_CTRL_REG0 (a800) - SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | MERGEDREGS } + SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS } 0000000001121028: 0000: 40a80001 80100080 t4 write SP_VS_INSTRLEN (a824) SP_VS_INSTRLEN: 1 @@ -689,11 +689,11 @@ t4 write HLSQ_CONTROL_1_REG (b982) HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r63.x | ZWCOORDREGID = r63.x } HLSQ_CONTROL_5_REG: 0xfc 00000000011210a0: 0000: 40b98285 00000007 fcfcfcfc fcfcfcfc fcfcfcfc 000000fc -t4 write HLSQ_UNKNOWN_B980 (b980) - HLSQ_UNKNOWN_B980: 0x1 +t4 write HLSQ_FS_CNTL_0 (b980) + HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 } 00000000011210b8: 0000: 48b98001 00000001 t4 write SP_FS_CTRL_REG0 (a980) - SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | MERGEDREGS | 0x1000000 } + SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS | 0x1000000 } 00000000011210c0: 0000: 40a98001 81100080 t4 write SP_FS_OBJ_FIRST_EXEC_OFFSET (a982) SP_FS_OBJ_FIRST_EXEC_OFFSET: 0 @@ -1076,7 +1076,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) !+ 00000001 VFD_DECODE[0].STEP_RATE: 0x1 !+ 0000000f VFD_DEST_CNTL[0].INSTR: { WRITEMASK = 0xf | REGID = r0.x } !+ 00000001 SP_UNKNOWN_A0F8: 0x1 -!+ 80100080 SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | MERGEDREGS } +!+ 80100080 SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS } !+ 00000001 SP_VS_PRIMITIVE_CNTL: { OUT = 1 } !+ 00000f00 SP_VS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0xf | B_REGID = r0.x | B_COMPMASK = 0 } + 00000000 SP_VS_VPC_DST[0].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 } @@ -1103,7 +1103,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) + 00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } + 00000000 SP_GS_PRIM_SIZE: 0 + 00000000 SP_GS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } -!+ 81100080 SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | MERGEDREGS | 0x1000000 } +!+ 81100080 SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS | 0x1000000 } + 00000000 SP_FS_OBJ_FIRST_EXEC_OFFSET: 0 + 00000000 SP_SRGB_CNTL: { 0 } !+ 0000000f SP_FS_RENDER_COMPONENTS: { RT0 = 0xf | RT1 = 0 | RT2 = 0 | RT3 = 0 | RT4 = 0 | RT5 = 0 | RT6 = 0 | RT7 = 0 } @@ -1139,7 +1139,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) + 00000000 HLSQ_HS_CNTL: { CONSTLEN = 0 } + 00000000 HLSQ_DS_CNTL: { CONSTLEN = 0 } + 00000000 HLSQ_GS_CNTL: { CONSTLEN = 0 } -!+ 00000001 HLSQ_UNKNOWN_B980: 0x1 +!+ 00000001 HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 } !+ 00000007 HLSQ_CONTROL_1_REG: 0x7 !+ fcfcfcfc HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | SIZE = r63.x } !+ fcfcfcfc HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x } @@ -1913,7 +1913,7 @@ t4 write SP_FS_OUTPUT_CNTL0 (a98c) SP_FS_OUTPUT_CNTL0: { DEPTH_REGID = r63.x | SAMPMASK_REGID = r63.x | STENCILREF_REGID = r63.x } 0000000001120020: 0000: 40a98c01 fcfcfc00 t4 write SP_VS_CTRL_REG0 (a800) - SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | MERGEDREGS } + SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS } 0000000001120028: 0000: 40a80001 80100080 t4 write SP_VS_INSTRLEN (a824) SP_VS_INSTRLEN: 1 @@ -1983,11 +1983,11 @@ t4 write HLSQ_CONTROL_1_REG (b982) HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r4.w | ZWCOORDREGID = r5.y } HLSQ_CONTROL_5_REG: 0xfc 00000000011200a0: 0000: 40b98285 00000007 fcfcfcfc fcfcfcfc 1513fcfc 000000fc -t4 write HLSQ_UNKNOWN_B980 (b980) - HLSQ_UNKNOWN_B980: 0x1 +t4 write HLSQ_FS_CNTL_0 (b980) + HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 } 00000000011200b8: 0000: 48b98001 00000001 t4 write SP_FS_CTRL_REG0 (a980) - SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 | THREADSIZE = FOUR_QUADS | VARYING | MERGEDREGS | 0x1000000 } + SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 | THREADSIZE = THREAD128 | VARYING | MERGEDREGS | 0x1000000 } 00000000011200c0: 0000: 40a98001 81508980 t4 write SP_FS_OBJ_FIRST_EXEC_OFFSET (a982) SP_FS_OBJ_FIRST_EXEC_OFFSET: 0 @@ -5273,7 +5273,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) + c7400000 VFD_DECODE[0].INSTR: { IDX = 0 | OFFSET = 0 | FORMAT = FMT6_32_32_32_FLOAT | SWAP = WZYX | UNK30 | FLOAT } + 00000001 VFD_DECODE[0].STEP_RATE: 0x1 + 0000000f VFD_DEST_CNTL[0].INSTR: { WRITEMASK = 0xf | REGID = r0.x } - + 80100080 SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | MERGEDREGS } + + 80100080 SP_VS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS } + 00000001 SP_VS_PRIMITIVE_CNTL: { OUT = 1 } + 00000f00 SP_VS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0xf | B_REGID = r0.x | B_COMPMASK = 0 } + 00000000 SP_VS_VPC_DST[0].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 } @@ -5299,7 +5299,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) + 00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } + 00000000 SP_GS_PRIM_SIZE: 0 + 00000000 SP_GS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } -!+ 81508980 SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 | THREADSIZE = FOUR_QUADS | VARYING | MERGEDREGS | 0x1000000 } +!+ 81508980 SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 | THREADSIZE = THREAD128 | VARYING | MERGEDREGS | 0x1000000 } + 00000000 SP_FS_OBJ_FIRST_EXEC_OFFSET: 0 !+ 01013000 SP_FS_OBJ_START_LO: 0x1013000 base=1013000, offset=0, size=11264 + 00000000 SP_FS_OBJ_START_HI: 0 base=1013000, offset=0, size=11264 @@ -6748,7 +6748,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) + 00000000 HLSQ_HS_CNTL: { CONSTLEN = 0 } + 00000000 HLSQ_DS_CNTL: { CONSTLEN = 0 } + 00000000 HLSQ_GS_CNTL: { CONSTLEN = 0 } - + 00000001 HLSQ_UNKNOWN_B980: 0x1 + + 00000001 HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 } + 00000007 HLSQ_CONTROL_1_REG: 0x7 + fcfcfcfc HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | SIZE = r63.x } + fcfcfcfc HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x } diff --git a/src/freedreno/computerator/a6xx.c b/src/freedreno/computerator/a6xx.c index bf2b926..6b23780 100644 --- a/src/freedreno/computerator/a6xx.c +++ b/src/freedreno/computerator/a6xx.c @@ -159,10 +159,11 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL_0, 2); OUT_RING(ring, A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) | - A6XX_HLSQ_CS_CNTL_0_UNK0(regid(63, 0)) | - A6XX_HLSQ_CS_CNTL_0_UNK1(regid(63, 0)) | + A6XX_HLSQ_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) | + A6XX_HLSQ_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) | A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id)); - OUT_RING(ring, 0x2fc); /* HLSQ_CS_UNKNOWN_B998 */ + OUT_RING(ring, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) | + A6XX_HLSQ_CS_CNTL_1_THREADSIZE(THREAD128)); OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START_LO, 2); OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */ diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml index 61a73cb..19364ac 100644 --- a/src/freedreno/registers/adreno/a6xx.xml +++ b/src/freedreno/registers/adreno/a6xx.xml @@ -3066,6 +3066,15 @@ to upconvert to 32b float internally? + + + + + + - + @@ -3622,7 +3631,11 @@ to upconvert to 32b float internally? - + + + + + @@ -3684,11 +3697,15 @@ to upconvert to 32b float internally? - - + + - + + + + + diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 0e1b6fd..3135311 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -391,14 +391,14 @@ tu6_emit_xs_config(struct tu_cs *cs, } bool is_fs = xs->type == MESA_SHADER_FRAGMENT; - enum a3xx_threadsize threadsize = FOUR_QUADS; + enum a6xx_threadsize threadsize = THREAD128; - /* TODO: - * the "threadsize" field may have nothing to do with threadsize, - * use a value that matches the blob until it is figured out + /* TODO: We probably should be setting the VS threadsize to 64 if paired + * with a GS, and HS + DS threadsize to 64 like freedreno. However this + * should probably come from ir3. */ if (xs->type == MESA_SHADER_GEOMETRY) - threadsize = TWO_QUADS; + threadsize = THREAD64; tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_ctrl, 1); tu_cs_emit(cs, @@ -543,10 +543,11 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader, tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_CNTL_0, 2); tu_cs_emit(cs, A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) | - A6XX_HLSQ_CS_CNTL_0_UNK0(regid(63, 0)) | - A6XX_HLSQ_CS_CNTL_0_UNK1(regid(63, 0)) | + A6XX_HLSQ_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) | + A6XX_HLSQ_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) | A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id)); - tu_cs_emit(cs, 0x2fc); /* HLSQ_CS_UNKNOWN_B998 */ + tu_cs_emit(cs, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) | + A6XX_HLSQ_CS_CNTL_1_THREADSIZE(THREAD128)); } static void @@ -1266,8 +1267,9 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) A6XX_HLSQ_CONTROL_4_REG_IJ_LINEAR_SAMPLE(ij_regid[IJ_LINEAR_SAMPLE])); tu_cs_emit(cs, 0xfc); - tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UNKNOWN_B980, 1); - tu_cs_emit(cs, enable_varyings ? 3 : 1); + tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_FS_CNTL_0, 1); + tu_cs_emit(cs, A6XX_HLSQ_FS_CNTL_0_THREADSIZE(THREAD128) | + COND(enable_varyings, A6XX_HLSQ_FS_CNTL_0_VARYINGS)); bool need_size = fs->frag_face || fs->fragcoord_compmask != 0; bool need_size_persamp = false; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c index 4a74fc2..1a1d260 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c @@ -86,10 +86,11 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL_0, 2); OUT_RING(ring, A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) | - A6XX_HLSQ_CS_CNTL_0_UNK0(regid(63, 0)) | - A6XX_HLSQ_CS_CNTL_0_UNK1(regid(63, 0)) | + A6XX_HLSQ_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) | + A6XX_HLSQ_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) | A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id)); - OUT_RING(ring, 0x2fc); /* HLSQ_CS_UNKNOWN_B998 */ + OUT_RING(ring, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) | + A6XX_HLSQ_CS_CNTL_1_THREADSIZE(THREAD128)); OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START_LO, 2); OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index fca1ab9..125da7d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -312,7 +312,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, uint32_t tess_coord_x_regid, tess_coord_y_regid, hs_patch_regid, ds_patch_regid; uint32_t ij_regid[IJ_COUNT]; uint32_t gs_header_regid; - enum a3xx_threadsize fssz; + enum a6xx_threadsize fssz; uint8_t psize_loc = ~0, pos_loc = ~0, layer_loc = ~0; uint8_t clip0_loc, clip1_loc; int i, j; @@ -332,7 +332,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, bool sample_shading = fs->per_samp | key->sample_shading; - fssz = FOUR_QUADS; + fssz = THREAD128; pos_regid = ir3_find_output_regid(vs, VARYING_SLOT_POS); psize_regid = ir3_find_output_regid(vs, VARYING_SLOT_PSIZ); @@ -450,11 +450,11 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, COND(fs_has_dual_src_color, A6XX_SP_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE)); - enum a3xx_threadsize vssz; + enum a6xx_threadsize vssz; if (ds || hs) { - vssz = TWO_QUADS; + vssz = THREAD64; } else { - vssz = FOUR_QUADS; + vssz = THREAD128; } OUT_PKT4(ring, REG_A6XX_SP_VS_CTRL_REG0, 1); @@ -578,7 +578,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, if (hs) { OUT_PKT4(ring, REG_A6XX_SP_HS_CTRL_REG0, 1); - OUT_RING(ring, A6XX_SP_HS_CTRL_REG0_THREADSIZE(TWO_QUADS) | + OUT_RING(ring, A6XX_SP_HS_CTRL_REG0_THREADSIZE(THREAD64) | A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(hs->info.max_reg + 1) | A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(hs->info.max_half_reg + 1) | COND(hs->mergedregs, A6XX_SP_HS_CTRL_REG0_MERGEDREGS) | @@ -590,7 +590,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, fd6_emit_link_map(ctx->screen, vs, hs, ring); OUT_PKT4(ring, REG_A6XX_SP_DS_CTRL_REG0, 1); - OUT_RING(ring, A6XX_SP_DS_CTRL_REG0_THREADSIZE(TWO_QUADS) | + OUT_RING(ring, A6XX_SP_DS_CTRL_REG0_THREADSIZE(THREAD64) | A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(ds->info.max_reg + 1) | A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(ds->info.max_half_reg + 1) | COND(ds->mergedregs, A6XX_SP_DS_CTRL_REG0_MERGEDREGS) | @@ -718,8 +718,9 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, A6XX_HLSQ_CONTROL_4_REG_IJ_LINEAR_SAMPLE(ij_regid[IJ_LINEAR_SAMPLE])); OUT_RING(ring, 0xfc); /* XXX */ - OUT_PKT4(ring, REG_A6XX_HLSQ_UNKNOWN_B980, 1); - OUT_RING(ring, enable_varyings ? 3 : 1); + OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL_0, 1); + OUT_RING(ring, A6XX_HLSQ_FS_CNTL_0_THREADSIZE(THREAD128) | + COND(enable_varyings, A6XX_HLSQ_FS_CNTL_0_VARYINGS)); OUT_PKT4(ring, REG_A6XX_SP_FS_CTRL_REG0, 1); OUT_RING(ring, A6XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | @@ -804,7 +805,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, if (gs) { OUT_PKT4(ring, REG_A6XX_SP_GS_CTRL_REG0, 1); - OUT_RING(ring, A6XX_SP_GS_CTRL_REG0_THREADSIZE(TWO_QUADS) | + OUT_RING(ring, A6XX_SP_GS_CTRL_REG0_THREADSIZE(THREAD64) | A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(gs->info.max_reg + 1) | A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(gs->info.max_half_reg + 1) | COND(gs->mergedregs, A6XX_SP_GS_CTRL_REG0_MERGEDREGS) | -- 2.7.4