freedreno/a6xx: Document private memory registers
authorConnor Abbott <cwabbott0@gmail.com>
Tue, 27 Oct 2020 15:12:27 +0000 (16:12 +0100)
committerConnor Abbott <cwabbott0@gmail.com>
Thu, 19 Nov 2020 16:55:03 +0000 (17:55 +0100)
They seem to be broadly similar to the a3xx ones, albeit with some
things shuffled around and with different units, and the extra layout
mode bits.

We also document the FIRST_EXEC_OFFSET registers, so that we can start
properly setting them all to 0 in freedreno and turnip in later commits.
I discovered the compute one when playing with function support in the
blob CL driver, and added the other registers via analogy (the blob
Vulkan driver sets FIRST_EXEC_OFFSET and the shader VA together in one
packet for all stages, so it seems to really be in the same place for
all stages).

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7386>

src/freedreno/.gitlab-ci/reference/crash.log
src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log
src/freedreno/.gitlab-ci/reference/fd-clouds.log
src/freedreno/registers/adreno/a6xx.xml
src/freedreno/vulkan/tu_cmd_buffer.c
src/gallium/drivers/freedreno/a6xx/fd6_emit.c
src/gallium/drivers/freedreno/a6xx/fd6_program.c

index 502fa64..76ee876 100644 (file)
@@ -7962,26 +7962,26 @@ clusters:
        00000000        SP_VS_VPC_DST[0x5].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
        00000000        SP_VS_VPC_DST[0x6].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
        00000000        SP_VS_VPC_DST[0x7].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
-       00000000        SP_UNKNOWN_A81B: 0
+       00000000        SP_VS_OBJ_FIRST_EXEC_OFFSET: 0
        8e5d7d37        SP_VS_OBJ_START_LO: 0x8e5d7d37
        0001fcd5        SP_VS_OBJ_START_HI: 0x1fcd5
-       00000000        0xa81e: 00000000
-       00000000        0xa81f: 00000000
-       00000000        0xa820: 00000000
-       00000000        0xa821: 00000000
+       00000000        SP_VS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+       00000000        SP_VS_PVT_MEM_ADDR: 0
+       00000000        SP_VS_PVT_MEM_ADDR+0x1: 0
+       00000000        SP_VS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
        00000080        SP_VS_TEX_COUNT: 128
        00000100        SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
        00000000        SP_VS_INSTRLEN: 0
        00000000        SP_HS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS }
        00000000        SP_HS_UNKNOWN_A831: 0
        00000000        0xa832: 00000000
-       00000000        SP_HS_UNKNOWN_A833: 0
+       00000000        SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
        780a8ca5        SP_HS_OBJ_START_LO: 0x780a8ca5
        0001aad2        SP_HS_OBJ_START_HI: 0x1aad2
-       00000000        0xa836: 00000000
-       00000000        0xa837: 00000000
-       00000000        0xa838: 00000000
-       00000000        0xa839: 00000000
+       00000000        SP_HS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+       00000000        SP_HS_PVT_MEM_ADDR: 0
+       00000000        SP_HS_PVT_MEM_ADDR+0x1: 0
+       00000000        SP_HS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
        00000080        SP_HS_TEX_COUNT: 128
        00000000        SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
        00000000        SP_HS_INSTRLEN: 0
@@ -8012,13 +8012,13 @@ clusters:
        00000000        SP_DS_VPC_DST[0x5].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
        00000000        SP_DS_VPC_DST[0x6].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
        00000000        SP_DS_VPC_DST[0x7].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
-       00000000        SP_DS_UNKNOWN_A85B: 0
+       00000000        SP_DS_OBJ_FIRST_EXEC_OFFSET: 0
        7abf500d        SP_DS_OBJ_START_LO: 0x7abf500d
        00017e52        SP_DS_OBJ_START_HI: 0x17e52
-       00000000        0xa85e: 00000000
-       00000000        0xa85f: 00000000
-       00000000        0xa860: 00000000
-       00000000        0xa861: 00000000
+       00000000        SP_DS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+       00000000        SP_DS_PVT_MEM_ADDR: 0
+       00000000        SP_DS_PVT_MEM_ADDR+0x1: 0
+       00000000        SP_DS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
        00000080        SP_DS_TEX_COUNT: 128
        00000000        SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
        00000000        SP_DS_INSTRLEN: 0
@@ -8050,13 +8050,13 @@ clusters:
        00000000        SP_GS_VPC_DST[0x5].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
        00000000        SP_GS_VPC_DST[0x6].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
        00000000        SP_GS_VPC_DST[0x7].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
-       00000000        0xa88c: 00000000
+       00000000        SP_GS_OBJ_FIRST_EXEC_OFFSET: 0
        14e2046b        SP_GS_OBJ_START_LO: 0x14e2046b
        00004c8f        SP_GS_OBJ_START_HI: 0x4c8f
-       00000000        0xa88f: 00000000
-       00000000        0xa890: 00000000
-       00000000        0xa891: 00000000
-       00000000        0xa892: 00000000
+       00000000        SP_GS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+       00000000        SP_GS_PVT_MEM_ADDR: 0
+       00000000        SP_GS_PVT_MEM_ADDR+0x1: 0
+       00000000        SP_GS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
        00000080        SP_GS_TEX_COUNT: 128
        00000100        SP_GS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
        00000000        SP_GS_INSTRLEN: 0
@@ -8108,26 +8108,26 @@ clusters:
        00000000        SP_VS_VPC_DST[0x5].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
        00000000        SP_VS_VPC_DST[0x6].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
        00000000        SP_VS_VPC_DST[0x7].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
-       00000000        SP_UNKNOWN_A81B: 0
+       00000000        SP_VS_OBJ_FIRST_EXEC_OFFSET: 0
        8e5d7d37        SP_VS_OBJ_START_LO: 0x8e5d7d37
        0001fcd5        SP_VS_OBJ_START_HI: 0x1fcd5
-       00000000        0xa81e: 00000000
-       00000000        0xa81f: 00000000
-       00000000        0xa820: 00000000
-       00000000        0xa821: 00000000
+       00000000        SP_VS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+       00000000        SP_VS_PVT_MEM_ADDR: 0
+       00000000        SP_VS_PVT_MEM_ADDR+0x1: 0
+       00000000        SP_VS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
        00000080        SP_VS_TEX_COUNT: 128
        00000100        SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
        00000000        SP_VS_INSTRLEN: 0
        00000000        SP_HS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS }
        00000000        SP_HS_UNKNOWN_A831: 0
        00000000        0xa832: 00000000
-       00000000        SP_HS_UNKNOWN_A833: 0
+       00000000        SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
        780a8ca5        SP_HS_OBJ_START_LO: 0x780a8ca5
        0001aad2        SP_HS_OBJ_START_HI: 0x1aad2
-       00000000        0xa836: 00000000
-       00000000        0xa837: 00000000
-       00000000        0xa838: 00000000
-       00000000        0xa839: 00000000
+       00000000        SP_HS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+       00000000        SP_HS_PVT_MEM_ADDR: 0
+       00000000        SP_HS_PVT_MEM_ADDR+0x1: 0
+       00000000        SP_HS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
        00000080        SP_HS_TEX_COUNT: 128
        00000000        SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
        00000000        SP_HS_INSTRLEN: 0
@@ -8158,13 +8158,13 @@ clusters:
        00000000        SP_DS_VPC_DST[0x5].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
        00000000        SP_DS_VPC_DST[0x6].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
        00000000        SP_DS_VPC_DST[0x7].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
-       00000000        SP_DS_UNKNOWN_A85B: 0
+       00000000        SP_DS_OBJ_FIRST_EXEC_OFFSET: 0
        7abf500d        SP_DS_OBJ_START_LO: 0x7abf500d
        00017e52        SP_DS_OBJ_START_HI: 0x17e52
-       00000000        0xa85e: 00000000
-       00000000        0xa85f: 00000000
-       00000000        0xa860: 00000000
-       00000000        0xa861: 00000000
+       00000000        SP_DS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+       00000000        SP_DS_PVT_MEM_ADDR: 0
+       00000000        SP_DS_PVT_MEM_ADDR+0x1: 0
+       00000000        SP_DS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
        00000080        SP_DS_TEX_COUNT: 128
        00000000        SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
        00000000        SP_DS_INSTRLEN: 0
@@ -8196,13 +8196,13 @@ clusters:
        00000000        SP_GS_VPC_DST[0x5].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
        00000000        SP_GS_VPC_DST[0x6].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
        00000000        SP_GS_VPC_DST[0x7].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
-       00000000        0xa88c: 00000000
+       00000000        SP_GS_OBJ_FIRST_EXEC_OFFSET: 0
        14e2046b        SP_GS_OBJ_START_LO: 0x14e2046b
        00004c8f        SP_GS_OBJ_START_HI: 0x4c8f
-       00000000        0xa88f: 00000000
-       00000000        0xa890: 00000000
-       00000000        0xa891: 00000000
-       00000000        0xa892: 00000000
+       00000000        SP_GS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+       00000000        SP_GS_PVT_MEM_ADDR: 0
+       00000000        SP_GS_PVT_MEM_ADDR+0x1: 0
+       00000000        SP_GS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
        00000080        SP_GS_TEX_COUNT: 128
        00000100        SP_GS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
        00000000        SP_GS_INSTRLEN: 0
@@ -8396,13 +8396,13 @@ clusters:
     - context: 0
        05100000        SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | PIXLODENABLE | 0x1000000 }
        00000000        SP_FS_BRANCH_COND: 0
-       00000000        SP_UNKNOWN_A982: 0
+       00000000        SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
        4bdb43d8        SP_FS_OBJ_START_LO: 0x4bdb43d8
        0001af86        SP_FS_OBJ_START_HI: 0x1af86
-       00000000        0xa985: 00000000
-       00000000        0xa986: 00000000
-       00000000        0xa987: 00000000
-       00000000        0xa988: 00000000
+       00000000        SP_FS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+       00000000        SP_FS_PVT_MEM_ADDR: 0
+       00000000        SP_FS_PVT_MEM_ADDR+0x1: 0
+       00000000        SP_FS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
        00000100        SP_BLEND_CNTL: { UNK8 }
        00000000        SP_SRGB_CNTL: { 0 }
        00000000        SP_FS_RENDER_COMPONENTS: { RT0 = 0 | RT1 = 0 | RT2 = 0 | RT3 = 0 | RT4 = 0 | RT5 = 0 | RT6 = 0 | RT7 = 0 }
@@ -8438,13 +8438,13 @@ clusters:
        00421800        SP_CS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 | THREADSIZE = TWO_QUADS | VARYING }
        0000001f        SP_CS_UNKNOWN_A9B1: 31
        00000000        0xa9b2: 00000000
-       00000000        SP_CS_UNKNOWN_A9B3: 0
+       00000000        SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
        8c415420        SP_CS_OBJ_START_LO: 0x8c415420
        00000000        SP_CS_OBJ_START_HI: 0
-       00000000        0xa9b6: 00000000
-       00000000        0xa9b7: 00000000
-       00000000        0xa9b8: 00000000
-       00000000        0xa9b9: 00000000
+       00000000        SP_CS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+       00000000        SP_CS_PVT_MEM_ADDR: 0
+       00000000        SP_CS_PVT_MEM_ADDR+0x1: 0
+       00000000        SP_CS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
        00000080        SP_CS_TEX_COUNT: 128
        00200100        SP_CS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 16 | NIBO = 0 }
        00000004        SP_CS_INSTRLEN: 4
@@ -8478,13 +8478,13 @@ clusters:
     - context: 1
        05100000        SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | PIXLODENABLE | 0x1000000 }
        00000000        SP_FS_BRANCH_COND: 0
-       00000000        SP_UNKNOWN_A982: 0
+       00000000        SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
        4bdb43d8        SP_FS_OBJ_START_LO: 0x4bdb43d8
        0001af86        SP_FS_OBJ_START_HI: 0x1af86
-       00000000        0xa985: 00000000
-       00000000        0xa986: 00000000
-       00000000        0xa987: 00000000
-       00000000        0xa988: 00000000
+       00000000        SP_FS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+       00000000        SP_FS_PVT_MEM_ADDR: 0
+       00000000        SP_FS_PVT_MEM_ADDR+0x1: 0
+       00000000        SP_FS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
        00000100        SP_BLEND_CNTL: { UNK8 }
        00000000        SP_SRGB_CNTL: { 0 }
        00000000        SP_FS_RENDER_COMPONENTS: { RT0 = 0 | RT1 = 0 | RT2 = 0 | RT3 = 0 | RT4 = 0 | RT5 = 0 | RT6 = 0 | RT7 = 0 }
@@ -8520,13 +8520,13 @@ clusters:
        00421800        SP_CS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 | THREADSIZE = TWO_QUADS | VARYING }
        0000001f        SP_CS_UNKNOWN_A9B1: 31
        00000000        0xa9b2: 00000000
-       00000000        SP_CS_UNKNOWN_A9B3: 0
+       00000000        SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
        8c415420        SP_CS_OBJ_START_LO: 0x8c415420
        00000000        SP_CS_OBJ_START_HI: 0
-       00000000        0xa9b6: 00000000
-       00000000        0xa9b7: 00000000
-       00000000        0xa9b8: 00000000
-       00000000        0xa9b9: 00000000
+       00000000        SP_CS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+       00000000        SP_CS_PVT_MEM_ADDR: 0
+       00000000        SP_CS_PVT_MEM_ADDR+0x1: 0
+       00000000        SP_CS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
        00000080        SP_CS_TEX_COUNT: 128
        00200100        SP_CS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 16 | NIBO = 0 }
        00000004        SP_CS_INSTRLEN: 4
index 7bbaa5d..3f56d8a 100644 (file)
@@ -69,8 +69,8 @@ t4            write UCHE_CLIENT_PF (0e19)
 t4             write RB_UNKNOWN_8E01 (8e01)
                        RB_UNKNOWN_8E01: 0
 00000000010580a4:              0000: 408e0101 00000000
-t4             write SP_UNKNOWN_A982 (a982)
-                       SP_UNKNOWN_A982: 0
+t4             write SP_FS_OBJ_FIRST_EXEC_OFFSET (a982)
+                       SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
 00000000010580ac:              0000: 48a98201 00000000
 t4             write SP_UNKNOWN_A9A8 (a9a8)
                        SP_UNKNOWN_A9A8: 0
@@ -147,8 +147,8 @@ t4          write PC_PRIMITIVE_CNTL_6 (9b06)
 t4             write PC_MULTIVIEW_CNTL (9b07)
                        PC_MULTIVIEW_CNTL: { VIEWS = 0 }
 0000000001058174:              0000: 489b0701 00000000
-t4             write SP_UNKNOWN_A81B (a81b)
-                       SP_UNKNOWN_A81B: 0
+t4             write SP_VS_OBJ_FIRST_EXEC_OFFSET (a81b)
+                       SP_VS_OBJ_FIRST_EXEC_OFFSET: 0
 000000000105817c:              0000: 40a81b01 00000000
 t4             write SP_UNKNOWN_B183 (b183)
                        SP_UNKNOWN_B183: 0
@@ -340,10 +340,10 @@ t7                opcode: CP_BLIT (2c) (2 dwords)
  +     00000000                VFD_MODE_CNTL: { 0 }
  +     00000000                VFD_MULTIVIEW_CNTL: { VIEWS = 0 }
 !+     00000001                VFD_ADD_OFFSET: { VERTEX }
- +     00000000                SP_UNKNOWN_A81B: 0
+ +     00000000                SP_VS_OBJ_FIRST_EXEC_OFFSET: 0
  +     00000000                SP_HS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS }
  +     00000000                SP_GS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS }
- +     00000000                SP_UNKNOWN_A982: 0
+ +     00000000                SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
  +     00000000                SP_UNKNOWN_A9A8: 0
 !+     00000005                SP_MODE_CONTROL: { CONSTANT_DEMOTION_ENABLE | 0x4 }
  +     00000000                SP_IBO_COUNT: 0
index e2f0c7c..24fdc66 100644 (file)
@@ -135,8 +135,8 @@ t4          write PC_RASTER_CNTL (9980)
 t4             write PC_MULTIVIEW_CNTL (9b07)
                        PC_MULTIVIEW_CNTL: { VIEWS = 0 }
 0000000001d91154:              0000: 489b0701 00000000
-t4             write SP_UNKNOWN_A81B (a81b)
-                       SP_UNKNOWN_A81B: 0
+t4             write SP_VS_OBJ_FIRST_EXEC_OFFSET (a81b)
+                       SP_VS_OBJ_FIRST_EXEC_OFFSET: 0
 0000000001d9115c:              0000: 40a81b01 00000000
 t4             write SP_UNKNOWN_B183 (b183)
                        SP_UNKNOWN_B183: 0
@@ -603,8 +603,8 @@ t4                                  write SP_IBO_COUNT (ab20)
 0000000001121120:                              0120: 000000fc 000000fc 40930101 00ff0004 409b0601 00000000 40a87101 00000000
 0000000001121140:                              0140: 48910101 00ffff00 48910701 00000000 40a00186 fcfcfcfc 0000fcfc fcfcfcfc
 0000000001121160:                              0160: 000000fc 0000fcfc 00000000 40887001 00000000 48809401 00000000
-t4                                     write SP_HS_UNKNOWN_A833 (a833)
-                                               SP_HS_UNKNOWN_A833: 0
+t4                                     write SP_HS_OBJ_FIRST_EXEC_OFFSET (a833)
+                                               SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
 0000000001121000:                                      0000: 40a83301 00000000
 t4                                     write SP_FS_PREFETCH_CNTL (a99e)
                                                SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | 0x7000 }
@@ -709,8 +709,8 @@ t4                                  write HLSQ_UNKNOWN_B980 (b980)
 t4                                     write SP_FS_CTRL_REG0 (a980)
                                                SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | MERGEDREGS | 0x1000000 }
 00000000011210c0:                                      0000: 40a98001 81100080
-t4                                     write SP_UNKNOWN_A982 (a982)
-                                               SP_UNKNOWN_A982: 0
+t4                                     write SP_FS_OBJ_FIRST_EXEC_OFFSET (a982)
+                                               SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
 00000000011210c8:                                      0000: 48a98201 00000000
 t4                                     write VPC_VS_LAYER_CNTL (9104)
                                                VPC_VS_LAYER_CNTL: { LAYERLOC = 255 | VIEWLOC = 255 }
@@ -1094,7 +1094,7 @@ t7                        opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
 !+     00000001                        SP_VS_PRIMITIVE_CNTL: { OUT = 1 }
 !+     00000f00                        SP_VS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0xf | B_REGID = r0.x | B_COMPMASK = 0 }
  +     00000000                        SP_VS_VPC_DST[0].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
- +     00000000                        SP_UNKNOWN_A81B: 0
+ +     00000000                        SP_VS_OBJ_FIRST_EXEC_OFFSET: 0
 !+     01011000                        SP_VS_OBJ_START_LO: 0x1011000           base=1011000, offset=0, size=128
  +     00000000                        SP_VS_OBJ_START_HI: 0           base=1011000, offset=0, size=128
 0000000001011000:                              0000: 00000000 03000000 00000000 00000000 00000000 00000000 00000000 00000000
@@ -1119,13 +1119,13 @@ t7                      opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
 !+     00000100                        SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
 !+     00000001                        SP_VS_INSTRLEN: 1
  +     00000000                        SP_HS_UNKNOWN_A831: 0
- +     00000000                        SP_HS_UNKNOWN_A833: 0
+ +     00000000                        SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
  +     00000000                        SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
  +     00000000                        SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
  +     00000000                        SP_GS_PRIM_SIZE: 0
  +     00000000                        SP_GS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
 !+     81100080                        SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | MERGEDREGS | 0x1000000 }
- +     00000000                        SP_UNKNOWN_A982: 0
+ +     00000000                        SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
  +     00000000                        SP_SRGB_CNTL: { 0 }
 !+     0000000f                        SP_FS_RENDER_COMPONENTS: { RT0 = 0xf | RT1 = 0 | RT2 = 0 | RT3 = 0 | RT4 = 0 | RT5 = 0 | RT6 = 0 | RT7 = 0 }
 !+     fcfcfc00                        SP_FS_OUTPUT_CNTL0: { DEPTH_REGID = r63.x | SAMPMASK_REGID = r63.x | STENCILREF_REGID = r63.x }
@@ -1918,8 +1918,8 @@ t4                                        write SP_IBO_COUNT (ab20)
 00000000011202a0:                              02a0: 3dd70a3e 3d3851ec 40d9999a 3d4ac083 3ba3d70a 3de147ae 358637bd 33d6bf95
 00000000011202c0:                              02c0: 3f0ccccd 41800000 45070000 44b40000 3df5c28f 3f333333 3f266666 3f7851ec
 00000000011202e0:                              02e0: 3f19999a 3f666666 3f7d70a4 40100000 00000000 3db851ec 00000000 07ee25f4
-t4                                     write SP_HS_UNKNOWN_A833 (a833)
-                                               SP_HS_UNKNOWN_A833: 0
+t4                                     write SP_HS_OBJ_FIRST_EXEC_OFFSET (a833)
+                                               SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
 0000000001120000:                                      0000: 40a83301 00000000
 t4                                     write SP_FS_PREFETCH_CNTL (a99e)
                                                SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | 0x7000 }
@@ -2024,8 +2024,8 @@ t4                                        write HLSQ_UNKNOWN_B980 (b980)
 t4                                     write SP_FS_CTRL_REG0 (a980)
                                                SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 | THREADSIZE = FOUR_QUADS | VARYING | MERGEDREGS | 0x1000000 }
 00000000011200c0:                                      0000: 40a98001 81508980
-t4                                     write SP_UNKNOWN_A982 (a982)
-                                               SP_UNKNOWN_A982: 0
+t4                                     write SP_FS_OBJ_FIRST_EXEC_OFFSET (a982)
+                                               SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
 00000000011200c8:                                      0000: 48a98201 00000000
 t4                                     write VPC_VS_LAYER_CNTL (9104)
                                                VPC_VS_LAYER_CNTL: { LAYERLOC = 255 | VIEWLOC = 255 }
@@ -5344,13 +5344,13 @@ t7                      opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
  +     00000100                        SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
  +     00000001                        SP_VS_INSTRLEN: 1
  +     00000000                        SP_HS_UNKNOWN_A831: 0
- +     00000000                        SP_HS_UNKNOWN_A833: 0
+ +     00000000                        SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
  +     00000000                        SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
  +     00000000                        SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
  +     00000000                        SP_GS_PRIM_SIZE: 0
  +     00000000                        SP_GS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
 !+     81508980                        SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 | THREADSIZE = FOUR_QUADS | VARYING | MERGEDREGS | 0x1000000 }
- +     00000000                        SP_UNKNOWN_A982: 0
+ +     00000000                        SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
 !+     01013000                        SP_FS_OBJ_START_LO: 0x1013000           base=1013000, offset=0, size=11264
  +     00000000                        SP_FS_OBJ_START_HI: 0           base=1013000, offset=0, size=11264
 0000000001013000:                              0000: 40400000 204cc000 00000000 204cc006 3e99999a 204cc004 20080014 42700008
index b15d66c..082fd72 100644 (file)
@@ -3148,21 +3148,90 @@ to upconvert to 32b float internally?
                </reg32>
        </array>
 
-       <reg32 offset="0xa81b" name="SP_UNKNOWN_A81B"/>
+       <bitset name="a6xx_sp_xs_pvt_mem_param" inline="yes">
+               <bitfield name="MEMSIZEPERITEM" low="0" high="7" shr="9">
+                       <doc>The size of memory that ldp/stp can address.</doc>
+               </bitfield>
+               <bitfield name="HWSTACKSIZEPERTHREAD" low="24" high="31">
+                        <doc>
+                               Seems to be the same as a3xx. The maximum stack
+                               size in units of 4 calls, so a call depth of 7
+                               would result in a value of 2.
+                               TODO: What's the actual size per call, i.e. the
+                               size of the PC? a3xx docs say it's 16 bits
+                               there, but the length register now takes 28 bits
+                               so it's probably been bumped to 32 bits.
+                        </doc>
+               </bitfield>
+       </bitset>
+
+       <bitset name="a6xx_sp_xs_pvt_mem_size" inline="yes">
+               <bitfield name="TOTALPVTMEMSIZE" low="0" high="17" shr="12"/>
+               <bitfield name="PERWAVEMEMLAYOUT" pos="31" type="boolean">
+                       <doc>
+                               There are four indices used to compute the
+                               private memory location for an access:
+
+                               - stp/ldp offset
+                               - fiber id
+                               - wavefront id (a swizzled version of what "getwid" returns)
+                               - SP ID (the same as what "getspid" returns)
+
+                               The stride for the SP ID is always set by
+                               TOTALPVTMEMSIZE. In the per-wave layout, the
+                               indices are used in this order:
+
+                               - offset % 4 (offset within dword)
+                               - fiber id
+                               - offset / 4
+                               - wavefront id
+                               - SP ID
+
+                               and the stride for the wavefront ID is
+                               MEMSIZEPERITEM, multiplied by 128 (fibers per
+                               wavefront). In the per-fiber layout, the indices
+                               are used in this order:
+
+                               - offset
+                               - fiber id % 4
+                               - wavefront id
+                               - fiber id / 4
+                               - SP ID
+
+                               and the stride for the fiber id/wavefront id
+                               combo is MEMSIZEPERITEM.
+
+                               Note: Accesses of more than 1 dword do not work
+                               with per-fiber layout. The blob will fall back
+                               to per-wave instead.
+                       </doc>
+               </bitfield>
+       </bitset>
+
+       <reg32 offset="0xa81b" name="SP_VS_OBJ_FIRST_EXEC_OFFSET" type="uint"/>
        <reg32 offset="0xa81c" name="SP_VS_OBJ_START_LO"/>
        <reg32 offset="0xa81d" name="SP_VS_OBJ_START_HI"/>
+       <reg32 offset="0xa81e" name="SP_VS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param"/>
+       <reg64 offset="0xa81f" name="SP_VS_PVT_MEM_ADDR" type="waddress" align="32"/>
+       <reg32 offset="0xa821" name="SP_VS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size"/>
        <reg32 offset="0xa822" name="SP_VS_TEX_COUNT" type="uint"/>
        <reg32 offset="0xa823" name="SP_VS_CONFIG" type="a6xx_sp_xs_config"/>
        <reg32 offset="0xa824" name="SP_VS_INSTRLEN" type="uint"/>
+       <reg32 offset="0xa825" name="SP_VS_PVT_MEM_HW_STACK_OFFSET" low="0" high="18" shr="11"/>
 
        <reg32 offset="0xa830" name="SP_HS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0"/>
        <reg32 offset="0xa831" name="SP_HS_UNKNOWN_A831"/>
-       <reg32 offset="0xa833" name="SP_HS_UNKNOWN_A833"/>
+
+       <reg32 offset="0xa833" name="SP_HS_OBJ_FIRST_EXEC_OFFSET" type="uint"/>
        <reg32 offset="0xa834" name="SP_HS_OBJ_START_LO"/>
        <reg32 offset="0xa835" name="SP_HS_OBJ_START_HI"/>
+       <reg32 offset="0xa836" name="SP_HS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param"/>
+       <reg64 offset="0xa837" name="SP_HS_PVT_MEM_ADDR" type="waddress" align="32"/>
+       <reg32 offset="0xa839" name="SP_HS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size"/>
        <reg32 offset="0xa83a" name="SP_HS_TEX_COUNT" type="uint"/>
        <reg32 offset="0xa83b" name="SP_HS_CONFIG" type="a6xx_sp_xs_config"/>
        <reg32 offset="0xa83c" name="SP_HS_INSTRLEN" type="uint"/>
+       <reg32 offset="0xa83d" name="SP_HS_PVT_MEM_HW_STACK_OFFSET" low="0" high="18" shr="11"/>
 
        <reg32 offset="0xa840" name="SP_DS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0"/>
        <reg32 offset="0xa842" name="SP_DS_PRIMITIVE_CNTL">
@@ -3186,12 +3255,16 @@ to upconvert to 32b float internally?
                </reg32>
        </array>
 
-       <reg32 offset="0xa85b" name="SP_DS_UNKNOWN_A85B"/>
+       <reg32 offset="0xa85b" name="SP_DS_OBJ_FIRST_EXEC_OFFSET" type="uint"/>
        <reg32 offset="0xa85c" name="SP_DS_OBJ_START_LO"/>
        <reg32 offset="0xa85d" name="SP_DS_OBJ_START_HI"/>
+       <reg32 offset="0xa85e" name="SP_DS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param"/>
+       <reg64 offset="0xa85f" name="SP_DS_PVT_MEM_ADDR" type="waddress" align="32"/>
+       <reg32 offset="0xa861" name="SP_DS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size"/>
        <reg32 offset="0xa862" name="SP_DS_TEX_COUNT" type="uint"/>
        <reg32 offset="0xa863" name="SP_DS_CONFIG" type="a6xx_sp_xs_config"/>
        <reg32 offset="0xa864" name="SP_DS_INSTRLEN" type="uint"/>
+       <reg32 offset="0xa865" name="SP_DS_PVT_MEM_HW_STACK_OFFSET" low="0" high="18" shr="11"/>
 
        <reg32 offset="0xa870" name="SP_GS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0"/>
        <reg32 offset="0xa871" name="SP_GS_PRIM_SIZE">
@@ -3228,11 +3301,16 @@ to upconvert to 32b float internally?
                </reg32>
        </array>
 
+       <reg32 offset="0xa88c" name="SP_GS_OBJ_FIRST_EXEC_OFFSET" type="uint"/>
        <reg32 offset="0xa88d" name="SP_GS_OBJ_START_LO"/>
        <reg32 offset="0xa88e" name="SP_GS_OBJ_START_HI"/>
+       <reg32 offset="0xa88f" name="SP_GS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param"/>
+       <reg64 offset="0xa890" name="SP_GS_PVT_MEM_ADDR" type="waddress" align="32"/>
+       <reg32 offset="0xa892" name="SP_GS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size"/>
        <reg32 offset="0xa893" name="SP_GS_TEX_COUNT" type="uint"/>
        <reg32 offset="0xa894" name="SP_GS_CONFIG" type="a6xx_sp_xs_config"/>
        <reg32 offset="0xa895" name="SP_GS_INSTRLEN" type="uint"/>
+       <reg32 offset="0xa896" name="SP_GS_PVT_MEM_HW_STACK_OFFSET" low="0" high="18" shr="11"/>
 
        <reg32 offset="0xa8a0" name="SP_VS_TEX_SAMP_LO"/>
        <reg32 offset="0xa8a1" name="SP_VS_TEX_SAMP_HI"/>
@@ -3258,9 +3336,13 @@ to upconvert to 32b float internally?
                bit N corresponds to brac.N
                 -->
        </reg32>
-       <reg32 offset="0xa982" name="SP_UNKNOWN_A982"/>
+
+       <reg32 offset="0xa982" name="SP_FS_OBJ_FIRST_EXEC_OFFSET" type="uint"/>
        <reg32 offset="0xa983" name="SP_FS_OBJ_START_LO"/>
        <reg32 offset="0xa984" name="SP_FS_OBJ_START_HI"/>
+       <reg32 offset="0xa985" name="SP_FS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param"/>
+       <reg64 offset="0xa986" name="SP_FS_PVT_MEM_ADDR" type="waddress" align="32"/>
+       <reg32 offset="0xa988" name="SP_FS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size"/>
 
        <reg32 offset="0xa989" name="SP_BLEND_CNTL">
                <bitfield name="ENABLED" pos="0" type="boolean"/>
@@ -3346,6 +3428,8 @@ to upconvert to 32b float internally?
        <!-- always 0x0 ? -->
        <reg32 offset="0xa9a8" name="SP_UNKNOWN_A9A8"/>
 
+       <reg32 offset="0xa9a9" name="SP_FS_PVT_MEM_HW_STACK_OFFSET" low="0" high="18" shr="11"/>
+
        <!-- set for compute shaders, always 0x41 -->
        <reg32 offset="0xa9b1" name="SP_CS_UNKNOWN_A9B1" type="uint">
                <doc>
@@ -3358,9 +3442,6 @@ to upconvert to 32b float internally?
                <bitfield name="SHARED_SIZE_2K" pos="0" type="uint"/>
        </reg32>
 
-       <!-- set for compute shaders, always 0x0 -->
-       <reg32 offset="0xa9b3" name="SP_CS_UNKNOWN_A9B3" type="uint"/>
-
        <reg32 offset="0xa9ba" name="SP_CS_TEX_COUNT" type="uint"/>
 
        <reg32 offset="0xa9e0" name="SP_FS_TEX_SAMP_LO"/>
@@ -3385,10 +3466,23 @@ to upconvert to 32b float internally?
        </array>
 
        <reg32 offset="0xa9b0" name="SP_CS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0"/>
+       <reg32 offset="0xa9b3" name="SP_CS_OBJ_FIRST_EXEC_OFFSET" type="uint"/>
        <reg32 offset="0xa9b4" name="SP_CS_OBJ_START_LO"/>
        <reg32 offset="0xa9b5" name="SP_CS_OBJ_START_HI"/>
+       <reg32 offset="0xa9b6" name="SP_CS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param"/>
+       <reg64 offset="0xa9b7" name="SP_CS_PVT_MEM_ADDR" align="32"/>
+       <reg32 offset="0xa9b9" name="SP_CS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size"/>
        <reg32 offset="0xa9bb" name="SP_CS_CONFIG" type="a6xx_sp_xs_config"/>
        <reg32 offset="0xa9bc" name="SP_CS_INSTRLEN" type="uint"/>
+       <reg32 offset="0xa9bd" name="SP_CS_PVT_MEM_HW_STACK_OFFSET" low="0" high="18" shr="11">
+               <doc>
+                       This seems to be be the equivalent of HWSTACKOFFSET in
+                       a3xx. The offset formula isn't affected by
+                       HWSTACKOFFSETPERTHREAD at all, so the HW return address
+                       stack seems to be after all the normal per-SP private
+                       memory.
+               </doc>
+       </reg32>
 
        <!--
        IBO state for compute shader:
index 102932e..71a2b32 100644 (file)
@@ -756,7 +756,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
    tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000);
    tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_CLIENT_PF, 4);
    tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E01, 0x0);
-   tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A982, 0);
+   tu_cs_emit_write_reg(cs, REG_A6XX_SP_FS_OBJ_FIRST_EXEC_OFFSET, 0);
    tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A9A8, 0);
    tu_cs_emit_write_reg(cs, REG_A6XX_SP_MODE_CONTROL,
                         A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4);
@@ -782,7 +782,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 
    tu_cs_emit_regs(cs, A6XX_VPC_SO_DISABLE(true));
 
-   tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A81B, 0);
+   tu_cs_emit_write_reg(cs, REG_A6XX_SP_VS_OBJ_FIRST_EXEC_OFFSET, 0);
 
    tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0);
 
index 2bdcdb0..bbd9e68 100644 (file)
@@ -1217,7 +1217,7 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
 
        WRITE(REG_A6XX_PC_MULTIVIEW_CNTL, 0);
 
-       WRITE(REG_A6XX_SP_UNKNOWN_A81B, 0);
+       WRITE(REG_A6XX_SP_VS_OBJ_FIRST_EXEC_OFFSET, 0);
 
        WRITE(REG_A6XX_SP_UNKNOWN_B183, 0);
 
index c32ec9d..95dbdd2 100644 (file)
@@ -430,7 +430,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
         * emitted if frag-prog is dirty vs if vert-prog is dirty..
         */
 
-       OUT_PKT4(ring, REG_A6XX_SP_HS_UNKNOWN_A833, 1);
+       OUT_PKT4(ring, REG_A6XX_SP_HS_OBJ_FIRST_EXEC_OFFSET, 1);
        OUT_RING(ring, 0x0);
 
        OUT_PKT4(ring, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + fs->num_sampler_prefetch);
@@ -723,8 +723,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
                        A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(fs->branchstack) |
                        COND(fs->need_pixlod, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
 
-       OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A982, 1);
-       OUT_RING(ring, 0);        /* XXX */
+       OUT_PKT4(ring, REG_A6XX_SP_FS_OBJ_FIRST_EXEC_OFFSET, 1);
+       OUT_RING(ring, 0);
 
        OUT_PKT4(ring, REG_A6XX_VPC_VS_LAYER_CNTL, 1);
        OUT_RING(ring, 0x0000ffff);        /* XXX */