From ee1f140fd9b295f2494606f803b4420bf752c112 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 9 Mar 2021 15:53:06 +0100 Subject: [PATCH] freedreno/a6xx: Cleanup SP_XS_CTRL_REG0 definitions The registers were actually different per-stage even though we used the same type, which resulted in a bunch of incorrectly programmed fields and confusion. Move the stage-specific values to the registers themselves, which makes things much less confusing and makes it possible to set "mergedregs" correctly. Part-of: --- src/freedreno/.gitlab-ci/reference/crash.log | 24 +++---- ...w.indexed.indirect_draw_count.triangle_list.log | 16 ++--- src/freedreno/.gitlab-ci/reference/fd-clouds.log | 16 ++--- src/freedreno/computerator/a6xx.c | 3 +- src/freedreno/registers/adreno/a6xx.xml | 72 +++++++++++-------- src/freedreno/vulkan/tu_pipeline.c | 80 +++++++++++++++------- src/gallium/drivers/freedreno/a6xx/fd6_compute.c | 3 +- src/gallium/drivers/freedreno/a6xx/fd6_program.c | 31 +++------ 8 files changed, 142 insertions(+), 103 deletions(-) diff --git a/src/freedreno/.gitlab-ci/reference/crash.log b/src/freedreno/.gitlab-ci/reference/crash.log index 8389724..a77a40d 100644 --- a/src/freedreno/.gitlab-ci/reference/crash.log +++ b/src/freedreno/.gitlab-ci/reference/crash.log @@ -6911,7 +6911,7 @@ clusters: 00000000 HLSQ_LOAD_STATE_GEOM_EXT_SRC_ADDR_HI: 0 - cluster-name: CLUSTER_SP_VS - context: 0 - 00000000 SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } + 00000000 SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 } 00000000 SP_VS_BRANCH_COND: 0 00000000 SP_VS_PRIMITIVE_CNTL: { OUT = 0 | FLAGS_REGID = r0.x } 00000000 SP_VS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0 | B_REGID = r0.x | B_COMPMASK = 0 } @@ -6948,7 +6948,7 @@ clusters: 00000080 SP_VS_TEX_COUNT: 128 00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } 00000000 SP_VS_INSTRLEN: 0 - 00000000 SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } + 00000000 SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 } 00000000 SP_HS_WAVE_INPUT_SIZE: 0 00000000 SP_HS_BRANCH_COND: 0 00000000 SP_HS_OBJ_FIRST_EXEC_OFFSET: 0 @@ -6961,7 +6961,7 @@ clusters: 00000080 SP_HS_TEX_COUNT: 128 00000000 SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } 00000000 SP_HS_INSTRLEN: 0 - 00000000 SP_DS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } + 00000000 SP_DS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 } 00000000 SP_DS_BRANCH_COND: 0 00000000 SP_DS_PRIMITIVE_CNTL: { OUT = 0 | FLAGS_REGID = r0.x } 00000000 SP_DS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0 | B_REGID = r0.x | B_COMPMASK = 0 } @@ -6998,7 +6998,7 @@ clusters: 00000080 SP_DS_TEX_COUNT: 128 00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } 00000000 SP_DS_INSTRLEN: 0 - 00000000 SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } + 00000000 SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 } 00000000 SP_GS_PRIM_SIZE: 0 00000000 SP_GS_BRANCH_COND: 0 00000000 SP_GS_PRIMITIVE_CNTL: { OUT = 0 | FLAGS_REGID = r0.x } @@ -7057,7 +7057,7 @@ clusters: 00000000 0xa8c2: 00000000 00000000 0xa8c3: 00000000 - context: 1 - 00000000 SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } + 00000000 SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 } 00000000 SP_VS_BRANCH_COND: 0 00000000 SP_VS_PRIMITIVE_CNTL: { OUT = 0 | FLAGS_REGID = r0.x } 00000000 SP_VS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0 | B_REGID = r0.x | B_COMPMASK = 0 } @@ -7094,7 +7094,7 @@ clusters: 00000080 SP_VS_TEX_COUNT: 128 00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } 00000000 SP_VS_INSTRLEN: 0 - 00000000 SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } + 00000000 SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 } 00000000 SP_HS_WAVE_INPUT_SIZE: 0 00000000 SP_HS_BRANCH_COND: 0 00000000 SP_HS_OBJ_FIRST_EXEC_OFFSET: 0 @@ -7107,7 +7107,7 @@ clusters: 00000080 SP_HS_TEX_COUNT: 128 00000000 SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } 00000000 SP_HS_INSTRLEN: 0 - 00000000 SP_DS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } + 00000000 SP_DS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 } 00000000 SP_DS_BRANCH_COND: 0 00000000 SP_DS_PRIMITIVE_CNTL: { OUT = 0 | FLAGS_REGID = r0.x } 00000000 SP_DS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0 | B_REGID = r0.x | B_COMPMASK = 0 } @@ -7144,7 +7144,7 @@ clusters: 00000080 SP_DS_TEX_COUNT: 128 00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } 00000000 SP_DS_INSTRLEN: 0 - 00000000 SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } + 00000000 SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 } 00000000 SP_GS_PRIM_SIZE: 0 00000000 SP_GS_BRANCH_COND: 0 00000000 SP_GS_PRIMITIVE_CNTL: { OUT = 0 | FLAGS_REGID = r0.x } @@ -7370,7 +7370,7 @@ clusters: deadbeef HLSQ_2D_EVENT_CMD: { STATE_ID = 0xbe | EVENT = 0x6f | 0xdead0080 } - cluster-name: CLUSTER_SP_PS - context: 0 - 05100000 SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | UNK24 | PIXLODENABLE } + 05100000 SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | UNK24 | PIXLODENABLE | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 } 00000000 SP_FS_BRANCH_COND: 0 00000000 SP_FS_OBJ_FIRST_EXEC_OFFSET: 0 4bdb43d8 SP_FS_OBJ_START: 0x4bdb43d8 @@ -7411,7 +7411,7 @@ clusters: 00000000 SP_FS_BINDLESS_PREFETCH[0x3].CMD: { SAMP_ID = 0 | TEX_ID = 0 } 00000080 SP_FS_TEX_COUNT: 128 0000f000 SP_UNKNOWN_A9A8: 0xf000 - 00421800 SP_CS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 | THREADSIZE = THREAD64 | VARYING } + 00421800 SP_CS_CTRL_REG0: { THREADSIZE = THREAD64 | UNK22 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 } 0000001f SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 31 } 00000000 SP_CS_BRANCH_COND: 0 00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0 @@ -7452,7 +7452,7 @@ clusters: 00000000 0xaa30: 00000000 00000000 0xaa31: 00000000 - context: 1 - 05100000 SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | UNK24 | PIXLODENABLE } + 05100000 SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | UNK24 | PIXLODENABLE | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 } 00000000 SP_FS_BRANCH_COND: 0 00000000 SP_FS_OBJ_FIRST_EXEC_OFFSET: 0 4bdb43d8 SP_FS_OBJ_START: 0x4bdb43d8 @@ -7493,7 +7493,7 @@ clusters: 00000000 SP_FS_BINDLESS_PREFETCH[0x3].CMD: { SAMP_ID = 0 | TEX_ID = 0 } 00000080 SP_FS_TEX_COUNT: 128 0000f000 SP_UNKNOWN_A9A8: 0xf000 - 00421800 SP_CS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 | THREADSIZE = THREAD64 | VARYING } + 00421800 SP_CS_CTRL_REG0: { THREADSIZE = THREAD64 | UNK22 | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 } 0000001f SP_CS_UNKNOWN_A9B1: { SHARED_SIZE = 31 } 00000000 SP_CS_BRANCH_COND: 0 00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0 diff --git a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log index 00938a1..f4d98cb 100644 --- a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log +++ b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log @@ -195,10 +195,10 @@ t7 opcode: CP_SET_DRAW_STATE (43) (4 dwords) { ADDR_HI = 0 } 00000000010581ec: 0000: 70438003 00040000 00000000 00000000 t4 write SP_HS_CTRL_REG0 (a830) - SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } + SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 } 00000000010581fc: 0000: 40a83001 00000000 t4 write SP_GS_CTRL_REG0 (a870) - SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } + SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 } 0000000001058204: 0000: 48a87001 00000000 t4 write GRAS_LRZ_CNTL (8100) GRAS_LRZ_CNTL: { 0 } @@ -341,8 +341,8 @@ t7 opcode: CP_BLIT (2c) (2 dwords) + 00000000 VFD_MULTIVIEW_CNTL: { VIEWS = 0 } !+ 00000001 VFD_ADD_OFFSET: { VERTEX } + 00000000 SP_VS_OBJ_FIRST_EXEC_OFFSET: 0 - + 00000000 SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } - + 00000000 SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = THREAD64 } + + 00000000 SP_HS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 } + + 00000000 SP_GS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 } + 00000000 SP_FS_OBJ_FIRST_EXEC_OFFSET: 0 + 00000000 SP_UNKNOWN_A9A8: 0 !+ 00000005 SP_MODE_CONTROL: { CONSTANT_DEMOTION_ENABLE | UNK2 } @@ -783,7 +783,7 @@ t4 write HLSQ_INVALIDATE_CMD (bb08) HLSQ_INVALIDATE_CMD: { VS_STATE | HS_STATE | DS_STATE | GS_STATE | FS_STATE | GFX_IBO | CS_BINDLESS = 0 | GFX_BINDLESS = 0 } 0000000001054180: 0000: 40bb0801 0000009f t4 write SP_VS_CTRL_REG0 (a800) - SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS } + SP_VS_CTRL_REG0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 0 | 0x80000000 } 0000000001054188: 0000: 40a80001 80100180 t4 write SP_VS_CONFIG (a823) SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } @@ -863,7 +863,7 @@ t4 write HLSQ_GS_CNTL (b803) HLSQ_GS_CNTL: { CONSTLEN = 0 } 0000000001054208: 0000: 48b80301 00000000 t4 write SP_FS_CTRL_REG0 (a980) - SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 2 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS } + SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 2 | BRANCHSTACK = 0 } 0000000001054210: 0000: 40a98001 81500100 t4 write SP_FS_CONFIG (ab04) SP_FS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } @@ -1447,7 +1447,7 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords) !+ 0000000f VFD_DEST_CNTL[0].INSTR: { WRITEMASK = 0xf | REGID = r0.x } !+ 0000004f VFD_DEST_CNTL[0x1].INSTR: { WRITEMASK = 0xf | REGID = r1.x } !+ 00000081 VFD_DEST_CNTL[0x2].INSTR: { WRITEMASK = 0x1 | REGID = r2.x } -!+ 80100180 SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS } +!+ 80100180 SP_VS_CTRL_REG0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 3 | BRANCHSTACK = 0 | 0x80000000 } !+ 00000002 SP_VS_PRIMITIVE_CNTL: { OUT = 2 | FLAGS_REGID = r0.x } !+ 0f000f08 SP_VS_OUT[0].REG: { A_REGID = r2.x | A_COMPMASK = 0xf | B_REGID = r0.x | B_COMPMASK = 0xf } !+ 00000400 SP_VS_VPC_DST[0].REG: { OUTLOC0 = 0 | OUTLOC1 = 4 | OUTLOC2 = 0 | OUTLOC3 = 0 } @@ -1480,7 +1480,7 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords) + 00000000 SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } + 00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } + 00000000 SP_GS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } -!+ 81500100 SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 2 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS } +!+ 81500100 SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 2 | BRANCHSTACK = 0 } !+ 01054080 SP_FS_OBJ_START: 0x1054080 base=1054000, offset=128, size=12288 0000000001054080: 0000: 00002000 47300002 00002001 47300003 00002002 47300004 00002003 47308005 00000000010540a0: 0020: 00000000 03000000 00000000 00000000 00000000 00000000 00000000 00000000 diff --git a/src/freedreno/.gitlab-ci/reference/fd-clouds.log b/src/freedreno/.gitlab-ci/reference/fd-clouds.log index 046a4e8..07bb702 100644 --- a/src/freedreno/.gitlab-ci/reference/fd-clouds.log +++ b/src/freedreno/.gitlab-ci/reference/fd-clouds.log @@ -619,7 +619,7 @@ t4 write SP_FS_OUTPUT_CNTL0 (a98c) SP_FS_OUTPUT_CNTL0: { DEPTH_REGID = r63.x | SAMPMASK_REGID = r63.x | STENCILREF_REGID = r63.x } 0000000001121020: 0000: 40a98c01 fcfcfc00 t4 write SP_VS_CTRL_REG0 (a800) - SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS } + SP_VS_CTRL_REG0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | 0x80000000 } 0000000001121028: 0000: 40a80001 80100080 t4 write SP_VS_INSTRLEN (a824) SP_VS_INSTRLEN: 1 @@ -693,7 +693,7 @@ t4 write HLSQ_FS_CNTL_0 (b980) HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 } 00000000011210b8: 0000: 48b98001 00000001 t4 write SP_FS_CTRL_REG0 (a980) - SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | UNK24 | MERGEDREGS } + SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | UNK24 | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 } 00000000011210c0: 0000: 40a98001 81100080 t4 write SP_FS_OBJ_FIRST_EXEC_OFFSET (a982) SP_FS_OBJ_FIRST_EXEC_OFFSET: 0 @@ -1076,7 +1076,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) !+ 00000001 VFD_DECODE[0].STEP_RATE: 1 !+ 0000000f VFD_DEST_CNTL[0].INSTR: { WRITEMASK = 0xf | REGID = r0.x } !+ 00000001 SP_UNKNOWN_A0F8: 0x1 -!+ 80100080 SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS } +!+ 80100080 SP_VS_CTRL_REG0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | 0x80000000 } !+ 00000001 SP_VS_PRIMITIVE_CNTL: { OUT = 1 | FLAGS_REGID = r0.x } !+ 00000f00 SP_VS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0xf | B_REGID = r0.x | B_COMPMASK = 0 } + 00000000 SP_VS_VPC_DST[0].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 } @@ -1103,7 +1103,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) + 00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } + 00000000 SP_GS_PRIM_SIZE: 0 + 00000000 SP_GS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } -!+ 81100080 SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | UNK24 | MERGEDREGS } +!+ 81100080 SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | UNK24 | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 } + 00000000 SP_FS_OBJ_FIRST_EXEC_OFFSET: 0 + 00000000 SP_SRGB_CNTL: { 0 } !+ 0000000f SP_FS_RENDER_COMPONENTS: { RT0 = 0xf | RT1 = 0 | RT2 = 0 | RT3 = 0 | RT4 = 0 | RT5 = 0 | RT6 = 0 | RT7 = 0 } @@ -1913,7 +1913,7 @@ t4 write SP_FS_OUTPUT_CNTL0 (a98c) SP_FS_OUTPUT_CNTL0: { DEPTH_REGID = r63.x | SAMPMASK_REGID = r63.x | STENCILREF_REGID = r63.x } 0000000001120020: 0000: 40a98c01 fcfcfc00 t4 write SP_VS_CTRL_REG0 (a800) - SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS } + SP_VS_CTRL_REG0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | 0x80000000 } 0000000001120028: 0000: 40a80001 80100080 t4 write SP_VS_INSTRLEN (a824) SP_VS_INSTRLEN: 1 @@ -1987,7 +1987,7 @@ t4 write HLSQ_FS_CNTL_0 (b980) HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 } 00000000011200b8: 0000: 48b98001 00000001 t4 write SP_FS_CTRL_REG0 (a980) - SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 | THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS } + SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 } 00000000011200c0: 0000: 40a98001 81508980 t4 write SP_FS_OBJ_FIRST_EXEC_OFFSET (a982) SP_FS_OBJ_FIRST_EXEC_OFFSET: 0 @@ -5275,7 +5275,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) + c7400000 VFD_DECODE[0].INSTR: { IDX = 0 | OFFSET = 0 | FORMAT = FMT6_32_32_32_FLOAT | SWAP = WZYX | UNK30 | FLOAT } + 00000001 VFD_DECODE[0].STEP_RATE: 1 + 0000000f VFD_DEST_CNTL[0].INSTR: { WRITEMASK = 0xf | REGID = r0.x } - + 80100080 SP_VS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = THREAD128 | MERGEDREGS } + + 80100080 SP_VS_CTRL_REG0: { MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | 0x80000000 } + 00000001 SP_VS_PRIMITIVE_CNTL: { OUT = 1 | FLAGS_REGID = r0.x } + 00000f00 SP_VS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0xf | B_REGID = r0.x | B_COMPMASK = 0 } + 00000000 SP_VS_VPC_DST[0].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 } @@ -5301,7 +5301,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) + 00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } + 00000000 SP_GS_PRIM_SIZE: 0 + 00000000 SP_GS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } -!+ 81508980 SP_FS_CTRL_REG0: { THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 | THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS } +!+ 81508980 SP_FS_CTRL_REG0: { THREADSIZE = THREAD128 | VARYING | UNK24 | MERGEDREGS | THREADMODE = MULTI | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 } + 00000000 SP_FS_OBJ_FIRST_EXEC_OFFSET: 0 !+ 01013000 SP_FS_OBJ_START: 0x1013000 base=1013000, offset=0, size=11264 0000000001013000: 0000: 40400000 204cc000 00000000 204cc006 3e99999a 204cc004 20080014 42700008 diff --git a/src/freedreno/computerator/a6xx.c b/src/freedreno/computerator/a6xx.c index 51bc423..7b95b54 100644 --- a/src/freedreno/computerator/a6xx.c +++ b/src/freedreno/computerator/a6xx.c @@ -150,8 +150,7 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) | A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) | COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) | - A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack) | - COND(v->need_pixlod, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE)); + A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack)); OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1); OUT_RING(ring, 0x41); diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml index f1bac71..f43da37 100644 --- a/src/freedreno/registers/adreno/a6xx.xml +++ b/src/freedreno/registers/adreno/a6xx.xml @@ -2674,7 +2674,7 @@ to upconvert to 32b float internally? @@ -2706,26 +2706,6 @@ to upconvert to 32b float internally? - - - - - - - - - - - - - - - @@ -2755,7 +2735,11 @@ to upconvert to 32b float internally? - + + + + + @@ -2855,7 +2839,13 @@ to upconvert to 32b float internally? - + + + + @@ -2909,7 +2901,13 @@ to upconvert to 32b float internally? - + + + + @@ -2955,7 +2953,18 @@ to upconvert to 32b float internally? - + + + + + + + + + + + + @@ -3058,7 +3067,16 @@ to upconvert to 32b float internally? - + + + + + + + + + + diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 4ddd50f..5ffc1f7 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -391,29 +391,63 @@ tu6_emit_xs_config(struct tu_cs *cs, return; } - bool is_fs = xs->type == MESA_SHADER_FRAGMENT; - enum a6xx_threadsize threadsize = THREAD128; - - /* TODO: We probably should be setting the VS threadsize to 64 if paired - * with a GS, and HS + DS threadsize to 64 like freedreno. However this - * should probably come from ir3. - */ - if (xs->type == MESA_SHADER_GEOMETRY) - threadsize = THREAD64; - - tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_ctrl, 1); - tu_cs_emit(cs, - A6XX_SP_VS_CTRL_REG0_THREADSIZE(threadsize) | - A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(xs->info.max_reg + 1) | - A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(xs->info.max_half_reg + 1) | - COND(xs->mergedregs, A6XX_SP_VS_CTRL_REG0_MERGEDREGS) | - A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(xs->branchstack) | - COND(xs->need_pixlod, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE) | - COND(xs->need_fine_derivatives, A6XX_SP_VS_CTRL_REG0_DIFF_FINE) | - /* only fragment shader sets VARYING bit */ - COND(xs->total_in && is_fs, A6XX_SP_FS_CTRL_REG0_VARYING) | - /* unknown bit, seems unnecessary */ - COND(is_fs, 0x1000000)); + switch (stage) { + case MESA_SHADER_VERTEX: + tu_cs_emit_regs(cs, A6XX_SP_VS_CTRL_REG0( + .fullregfootprint = xs->info.max_reg + 1, + .halfregfootprint = xs->info.max_half_reg + 1, + .branchstack = xs->branchstack, + .mergedregs = xs->mergedregs, + )); + break; + case MESA_SHADER_TESS_CTRL: + tu_cs_emit_regs(cs, A6XX_SP_HS_CTRL_REG0( + .fullregfootprint = xs->info.max_reg + 1, + .halfregfootprint = xs->info.max_half_reg + 1, + .branchstack = xs->branchstack, + )); + break; + case MESA_SHADER_TESS_EVAL: + tu_cs_emit_regs(cs, A6XX_SP_DS_CTRL_REG0( + .fullregfootprint = xs->info.max_reg + 1, + .halfregfootprint = xs->info.max_half_reg + 1, + .branchstack = xs->branchstack, + .mergedregs = xs->mergedregs, + )); + break; + case MESA_SHADER_GEOMETRY: + tu_cs_emit_regs(cs, A6XX_SP_GS_CTRL_REG0( + .fullregfootprint = xs->info.max_reg + 1, + .halfregfootprint = xs->info.max_half_reg + 1, + .branchstack = xs->branchstack, + )); + break; + case MESA_SHADER_FRAGMENT: + tu_cs_emit_regs(cs, A6XX_SP_FS_CTRL_REG0( + .fullregfootprint = xs->info.max_reg + 1, + .halfregfootprint = xs->info.max_half_reg + 1, + .branchstack = xs->branchstack, + .mergedregs = xs->mergedregs, + .threadsize = THREAD128, + .pixlodenable = xs->need_pixlod, + .diff_fine = xs->need_fine_derivatives, + .varying = xs->total_in != 0, + /* unknown bit, seems unnecessary */ + .unk24 = true, + )); + break; + case MESA_SHADER_COMPUTE: + tu_cs_emit_regs(cs, A6XX_SP_CS_CTRL_REG0( + .fullregfootprint = xs->info.max_reg + 1, + .halfregfootprint = xs->info.max_half_reg + 1, + .branchstack = xs->branchstack, + .mergedregs = xs->mergedregs, + .threadsize = THREAD128, + )); + break; + default: + unreachable("bad shader stage"); + } tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_config, 2); tu_cs_emit(cs, A6XX_SP_VS_CONFIG_ENABLED | diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c index ff1058e..652e239 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c @@ -74,8 +74,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) | A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) | COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) | - A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack) | - COND(v->need_pixlod, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE)); + A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack)); uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1); OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index f9c4009..b54eee8 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -450,20 +450,12 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, COND(fs_has_dual_src_color, A6XX_SP_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE)); - enum a6xx_threadsize vssz; - if (ds || hs) { - vssz = THREAD64; - } else { - vssz = THREAD128; - } - OUT_PKT4(ring, REG_A6XX_SP_VS_CTRL_REG0, 1); - OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_THREADSIZE(vssz) | + OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) | A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vs->info.max_half_reg + 1) | COND(vs->mergedregs, A6XX_SP_VS_CTRL_REG0_MERGEDREGS) | - A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack) | - COND(vs->need_pixlod, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE)); + A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack)); fd6_emit_shader(ctx, ring, vs); fd6_emit_immediates(ctx->screen, vs, ring); @@ -577,25 +569,23 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, } if (hs) { + assert(vs->mergedregs == hs->mergedregs); OUT_PKT4(ring, REG_A6XX_SP_HS_CTRL_REG0, 1); - OUT_RING(ring, A6XX_SP_HS_CTRL_REG0_THREADSIZE(THREAD64) | + OUT_RING(ring, A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(hs->info.max_reg + 1) | A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(hs->info.max_half_reg + 1) | - COND(hs->mergedregs, A6XX_SP_HS_CTRL_REG0_MERGEDREGS) | - A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(hs->branchstack) | - COND(hs->need_pixlod, A6XX_SP_HS_CTRL_REG0_PIXLODENABLE)); + A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(hs->branchstack)); fd6_emit_shader(ctx, ring, hs); fd6_emit_immediates(ctx->screen, hs, ring); fd6_emit_link_map(ctx->screen, vs, hs, ring); OUT_PKT4(ring, REG_A6XX_SP_DS_CTRL_REG0, 1); - OUT_RING(ring, A6XX_SP_DS_CTRL_REG0_THREADSIZE(THREAD64) | + OUT_RING(ring, A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(ds->info.max_reg + 1) | A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(ds->info.max_half_reg + 1) | COND(ds->mergedregs, A6XX_SP_DS_CTRL_REG0_MERGEDREGS) | - A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ds->branchstack) | - COND(ds->need_pixlod, A6XX_SP_DS_CTRL_REG0_PIXLODENABLE)); + A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ds->branchstack)); fd6_emit_shader(ctx, ring, ds); fd6_emit_immediates(ctx->screen, ds, ring); @@ -804,13 +794,12 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, A6XX_VPC_VS_PACK_STRIDE_IN_VPC(l.max_loc)); if (gs) { + assert(gs->mergedregs == (ds ? ds->mergedregs : vs->mergedregs)); OUT_PKT4(ring, REG_A6XX_SP_GS_CTRL_REG0, 1); - OUT_RING(ring, A6XX_SP_GS_CTRL_REG0_THREADSIZE(THREAD64) | + OUT_RING(ring, A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(gs->info.max_reg + 1) | A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(gs->info.max_half_reg + 1) | - COND(gs->mergedregs, A6XX_SP_GS_CTRL_REG0_MERGEDREGS) | - A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(gs->branchstack) | - COND(gs->need_pixlod, A6XX_SP_GS_CTRL_REG0_PIXLODENABLE)); + A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(gs->branchstack)); fd6_emit_shader(ctx, ring, gs); fd6_emit_immediates(ctx->screen, gs, ring); -- 2.7.4