freedreno, turnip: Clarify some RB_CCU_CNTL fields
authorDanylo Piliaiev <dpiliaiev@igalia.com>
Mon, 30 Jan 2023 16:08:29 +0000 (17:08 +0100)
committerMarge Bot <emma+marge@anholt.net>
Thu, 13 Jul 2023 18:06:36 +0000 (18:06 +0000)
There is no .gmem field, there is a ccu color cache size field
which tells the size as a fraction of depth cache used in direct
rendering.

There is also GMEM_FAST_CLEAR_DISABLE flag which is set on a608/a610.

Since these values will stop being the same between models,
make them configurable.

Credits to Connor Abbott for deciphering color cache size meaning.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20991>

14 files changed:
src/freedreno/.gitlab-ci/reference/crash.log
src/freedreno/.gitlab-ci/reference/crash_prefetch.log
src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log
src/freedreno/.gitlab-ci/reference/fd-clouds.log
src/freedreno/.gitlab-ci/reference/prefetch-test.log
src/freedreno/common/freedreno_dev_info.h
src/freedreno/common/freedreno_devices.py
src/freedreno/registers/adreno/a6xx.xml
src/freedreno/vulkan/tu_cmd_buffer.cc
src/freedreno/vulkan/tu_device.cc
src/freedreno/vulkan/tu_pipeline.cc
src/gallium/drivers/freedreno/a6xx/fd6_emit.cc
src/gallium/drivers/freedreno/a6xx/fd6_program.cc
src/gallium/drivers/freedreno/a6xx/fd6_screen.cc

index bc3f72fec722f5b3873913d80954ff263b91685d..0e49ee36951b3007b968f8cf1b09a52c1350535e 100644 (file)
@@ -1529,7 +1529,7 @@ registers:
        00000000        0xa630: 00000000
        00100000        RB_DBG_ECO_CNTL: 0x100000
        00000001        RB_ADDR_MODE_CNTL: ADDR_64B
-       00000000        RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0 }
+       00000000        RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0 }
        00000004        RB_NC_MODE_CNTL: { LOWER_BIT = 2 | UPPER_BIT = 0 }
        00000000        RB_PERFCTR_RB_SEL[0]+0: 00000000
        00000000        RB_PERFCTR_RB_SEL[0x1]+0: 00000000
@@ -7079,7 +7079,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
   - cluster-name: CLUSTER_SP_PS
     - context: 0
        00000001        HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 }
-       00000007        HLSQ_CONTROL_1_REG: 0x7
+       00000007        HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
        fcfcfcfc        HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
        fcfcfcfc        HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
        fcfcfcfc        HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r63.x | ZWCOORDREGID = r63.x }
@@ -7106,7 +7106,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
        00000000        HLSQ_CS_BINDLESS_BASE[0x4].DESCRIPTOR: { DESC_SIZE = 0 | ADDR = 0 }
     - context: 1
        00000001        HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 }
-       00000007        HLSQ_CONTROL_1_REG: 0x7
+       00000007        HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
        fcfcfcfc        HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
        fcfcfcfc        HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
        fcfcfcfc        HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r63.x | ZWCOORDREGID = r63.x }
index ddc5e36ccbbfc1fa22569501e83b3e06c4ed6b28..250218f81b78c3748da84b968fc153b31e8a1f12 100644 (file)
@@ -1744,7 +1744,7 @@ registers:
        00000000        0xa630: 00000000
        00100000        RB_DBG_ECO_CNTL: 0x100000
        00000001        RB_ADDR_MODE_CNTL: ADDR_64B
-       08000000        RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x10000 }
+       08000000        RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
        00000002        RB_NC_MODE_CNTL: { LOWER_BIT = 1 | UPPER_BIT = 0 }
        00000000        RB_PERFCTR_RB_SEL[0]+0: 00000000
        00000000        RB_PERFCTR_RB_SEL[0x1]+0: 00000000
@@ -2022,7 +2022,7 @@ got cmdszdw=83
  +     00000000                        RB_2D_SRC_SOLID_C3: 0
 !+     00000001                        RB_UNKNOWN_8E01: 0x1
 !+     00100000                        RB_DBG_ECO_CNTL: 0x100000
-!+     08000000                        RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x10000 }
+!+     08000000                        RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
  +     00000000                        VPC_UNKNOWN_9210: 0
  +     00000000                        VPC_UNKNOWN_9211: 0
  +     00000000                        VPC_POINT_COORD_INVERT: { 0 }
@@ -2332,7 +2332,7 @@ got cmdszdw=83
  +     00000000                                RB_UNKNOWN_88F0: 0
  +     00000001                                RB_UNKNOWN_8E01: 0x1
  +     00100000                                RB_DBG_ECO_CNTL: 0x100000
- +     08000000                                RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x10000 }
+ +     08000000                                RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
  +     00000000                                VPC_UNKNOWN_9210: 0
  +     00000000                                VPC_UNKNOWN_9211: 0
  +     00000000                                VPC_POINT_COORD_INVERT: { 0 }
@@ -3000,7 +3000,7 @@ got cmdszdw=83
  +     00000000                                HLSQ_DS_CNTL: { CONSTLEN = 0 }
  +     00000000                                HLSQ_GS_CNTL: { CONSTLEN = 0 }
 !+     00000001                                HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 }
-!+     00000007                                HLSQ_CONTROL_1_REG: 0x7
+!+     00000007                                HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
 !+     fcfcfcfc                                HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
 !+     fcfcfcfc                                HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
 !+     0200fcfc                                HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r0.x | ZWCOORDREGID = r0.z }
@@ -3714,7 +3714,7 @@ got cmdszdw=83
  +     00000000                                HLSQ_DS_CNTL: { CONSTLEN = 0 }
  +     00000000                                HLSQ_GS_CNTL: { CONSTLEN = 0 }
  +     00000001                                HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 }
- +     00000007                                HLSQ_CONTROL_1_REG: 0x7
+ +     00000007                                HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
  +     fcfcfcfc                                HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
  +     fcfcfcfc                                HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
  +     0200fcfc                                HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r0.x | ZWCOORDREGID = r0.z }
@@ -4439,7 +4439,7 @@ got cmdszdw=83
  +     00000000                                HLSQ_DS_CNTL: { CONSTLEN = 0 }
  +     00000000                                HLSQ_GS_CNTL: { CONSTLEN = 0 }
  +     00000001                                HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 }
- +     00000007                                HLSQ_CONTROL_1_REG: 0x7
+ +     00000007                                HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
  +     fcfcfcfc                                HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
  +     fcfcfcfc                                HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
  +     0200fcfc                                HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r0.x | ZWCOORDREGID = r0.z }
@@ -5090,7 +5090,7 @@ ESTIMATED CRASH LOCATION!
  +     00000000                                HLSQ_DS_CNTL: { CONSTLEN = 0 }
  +     00000000                                HLSQ_GS_CNTL: { CONSTLEN = 0 }
  +     00000001                                HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 }
- +     00000007                                HLSQ_CONTROL_1_REG: 0x7
+ +     00000007                                HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
  +     fcfcfcfc                                HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
  +     fcfcfcfc                                HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
  +     0200fcfc                                HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r0.x | ZWCOORDREGID = r0.z }
@@ -5212,7 +5212,7 @@ ESTIMATED CRASH LOCATION!
 !+     00000000                        RB_2D_DST_PITCH: 0
  +     00000001                        RB_UNKNOWN_8E01: 0x1
  +     00100000                        RB_DBG_ECO_CNTL: 0x100000
- +     08000000                        RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x10000 }
+ +     08000000                        RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
  +     00000000                        VPC_UNKNOWN_9210: 0
  +     00000000                        VPC_UNKNOWN_9211: 0
  +     00000000                        VPC_POINT_COORD_INVERT: { 0 }
@@ -18907,7 +18907,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
   - cluster-name: CLUSTER_SP_PS
     - context: 0
        00000001        HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 }
-       00000007        HLSQ_CONTROL_1_REG: 0x7
+       00000007        HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
        fcfcfcfc        HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
        fcfcfcfc        HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
        0200fcfc        HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r0.x | ZWCOORDREGID = r0.z }
@@ -18934,7 +18934,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
        00000000        HLSQ_CS_BINDLESS_BASE[0x4].DESCRIPTOR: { DESC_SIZE = 0 | ADDR = 0 }
     - context: 1
        00000001        HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 }
-       00000007        HLSQ_CONTROL_1_REG: 0x7
+       00000007        HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
        fcfcfcfc        HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
        fcfcfcfc        HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
        0200fcfc        HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r0.x | ZWCOORDREGID = r0.z }
index f01df22a01af2f1477f826c6539457e644ddf7ac..d573efcf3f95c1ce1184ba052a3f6b5ca39a24e6 100644 (file)
@@ -12,7 +12,7 @@ cmdstream[0]: 265 dwords
                opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
 0000000001058010:              0000: 70268000
                write RB_CCU_CNTL (8e07)
-                       RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x20000 }
+                       RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x20000 }
 0000000001058014:              0000: 408e0701 10000000
                write RB_DBG_ECO_CNTL (8e04)
                        RB_DBG_ECO_CNTL: 0x100000
@@ -310,7 +310,7 @@ cmdstream[0]: 265 dwords
 !+     000000ff                RB_2D_SRC_SOLID_C3: 0xff
  +     00000000                RB_UNKNOWN_8E01: 0
 !+     00100000                RB_DBG_ECO_CNTL: 0x100000
-!+     10000000                RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x20000 }
+!+     10000000                RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x20000 }
  +     00000000                VPC_UNKNOWN_9107: { 0 }
  +     00000000                VPC_UNKNOWN_9210: 0
  +     00000000                VPC_UNKNOWN_9211: 0
@@ -384,7 +384,7 @@ cmdstream[0]: 265 dwords
                opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
 000000000105832c:              0000: 70268000
                write RB_CCU_CNTL (8e07)
-                       RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | GMEM | COLOR_OFFSET = 0xf8000 }
+                       RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
 0000000001058330:              0000: 408e0701 7c400000
                write VPC_SO_DISABLE (9306)
                        VPC_SO_DISABLE: { 0 }
@@ -485,7 +485,7 @@ cmdstream[0]: 265 dwords
 !+     01012000                        RB_BLIT_FLAG_DST: 0x1012000
 !+     00004001                        RB_BLIT_FLAG_DST_PITCH: { PITCH = 64 | ARRAY_PITCH = 1024 }
 !+     00000003                        RB_BLIT_INFO: { UNK0 | GMEM | CLEAR_MASK = 0 | LAST = 0 | BUFFER_ID = 0 }
-!+     7c400000                        RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | GMEM | COLOR_OFFSET = 0xf8000 }
+!+     7c400000                        RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
 !+     00000000                        VPC_SO_DISABLE: { 0 }
  +     00000000                        SP_TP_WINDOW_OFFSET: { X = 0 | Y = 0 }
  +     00000000                        SP_WINDOW_OFFSET: { X = 0 | Y = 0 }
@@ -976,7 +976,7 @@ cmdstream[0]: 265 dwords
                                                SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff }
 0000000001054344:                                      0000: 40a99e01 00007fc0
                                        write HLSQ_CONTROL_1_REG (b982)
-                                               HLSQ_CONTROL_1_REG: 0x7
+                                               HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
                                                HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
                                                HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r0.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
                                                HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r63.x | ZWCOORDREGID = r63.x }
@@ -1495,7 +1495,7 @@ cmdstream[0]: 265 dwords
  +     00000000                        HLSQ_DS_CNTL: { CONSTLEN = 0 }
  +     00000000                        HLSQ_GS_CNTL: { CONSTLEN = 0 }
 !+     00000003                        HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 | VARYINGS }
-!+     00000007                        HLSQ_CONTROL_1_REG: 0x7
+!+     00000007                        HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
 !+     fcfcfcfc                        HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
 !+     fcfcfc00                        HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r0.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
 !+     fcfcfcfc                        HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r63.x | ZWCOORDREGID = r63.x }
index cfd009a066e23769141d691c73874fb785b72596..c4c7ff0ef38f387626a41c782f1affe577de94e2 100644 (file)
@@ -245,7 +245,7 @@ cmdstream[0]: 1023 dwords
                opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
 0000000001d91278:              0000: 70268000
                write RB_CCU_CNTL (8e07)
-                       RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | GMEM | COLOR_OFFSET = 0xf8000 }
+                       RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
 0000000001d9127c:              0000: 408e0701 7c400004
                write RB_DEPTH_BUFFER_INFO (8872)
                        RB_DEPTH_BUFFER_INFO: { DEPTH_FORMAT = DEPTH6_NONE }
@@ -675,7 +675,7 @@ cmdstream[0]: 1023 dwords
                                                PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
 0000000001121098:                                      0000: 409b0301 00000000
                                        write HLSQ_CONTROL_1_REG (b982)
-                                               HLSQ_CONTROL_1_REG: 0x7
+                                               HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
                                                HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
                                                HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
                                                HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r63.x | ZWCOORDREGID = r63.x }
@@ -1007,7 +1007,7 @@ cmdstream[0]: 1023 dwords
  +     00000000                        RB_MRT_FLAG_BUFFER[0].PITCH: { PITCH = 0 | ARRAY_PITCH = 0 }
 !+     00000001                        RB_UNKNOWN_8E01: 0x1
  +     00000000                        RB_DBG_ECO_CNTL: 0
-!+     7c400004                        RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | GMEM | COLOR_OFFSET = 0xf8000 }
+!+     7c400004                        RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
 !+     00ffff00                        VPC_VS_CLIP_CNTL: { CLIP_MASK = 0 | CLIP_DIST_03_LOC = 255 | CLIP_DIST_47_LOC = 255 }
 !+     0000ffff                        VPC_VS_LAYER_CNTL: { LAYERLOC = 255 | VIEWLOC = 255 }
  +     00000000                        VPC_UNKNOWN_9107: { 0 }
@@ -1121,7 +1121,7 @@ cmdstream[0]: 1023 dwords
  +     00000000                        HLSQ_DS_CNTL: { CONSTLEN = 0 }
  +     00000000                        HLSQ_GS_CNTL: { CONSTLEN = 0 }
 !+     00000001                        HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 }
-!+     00000007                        HLSQ_CONTROL_1_REG: 0x7
+!+     00000007                        HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
 !+     fcfcfcfc                        HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
 !+     fcfcfcfc                        HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
 !+     fcfcfcfc                        HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r63.x | ZWCOORDREGID = r63.x }
@@ -1498,7 +1498,7 @@ cmdstream[0]: 1023 dwords
                opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
 0000000001d91938:              0000: 70268000
                write RB_CCU_CNTL (8e07)
-                       RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | GMEM | COLOR_OFFSET = 0xf8000 }
+                       RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
 0000000001d9193c:              0000: 408e0701 7c400004
                write VPC_SO_DISABLE (9306)
                        VPC_SO_DISABLE: { DISABLE }
@@ -1678,7 +1678,7 @@ cmdstream[0]: 1023 dwords
  +     00000000                        RB_BLIT_CLEAR_COLOR_DW2: 0
  +     00000000                        RB_BLIT_CLEAR_COLOR_DW3: 0
 !+     000000f2                        RB_BLIT_INFO: { GMEM | CLEAR_MASK = 0xf | LAST = 0 | BUFFER_ID = 0 }
- +     7c400004                        RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | GMEM | COLOR_OFFSET = 0xf8000 }
+ +     7c400004                        RB_CCU_CNTL: { CONCURRENT_RESOLVE | DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_QUARTER | COLOR_OFFSET = 0xf8000 }
 !+     00000001                        VPC_SO_DISABLE: { DISABLE }
  +     00000001                        PC_POWER_CNTL: 0x1
 !+     00000000                        VFD_MODE_CNTL: { RENDER_MODE = RENDERING_PASS }
@@ -1959,7 +1959,7 @@ cmdstream[0]: 1023 dwords
                                                PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
 0000000001120098:                                      0000: 409b0301 00000000
                                        write HLSQ_CONTROL_1_REG (b982)
-                                               HLSQ_CONTROL_1_REG: 0x7
+                                               HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
                                                HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
                                                HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
                                                HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r4.w | ZWCOORDREGID = r5.y }
@@ -6727,7 +6727,7 @@ cmdstream[0]: 1023 dwords
  +     00000000                        HLSQ_DS_CNTL: { CONSTLEN = 0 }
  +     00000000                        HLSQ_GS_CNTL: { CONSTLEN = 0 }
  +     00000001                        HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 }
- +     00000007                        HLSQ_CONTROL_1_REG: 0x7
+ +     00000007                        HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
  +     fcfcfcfc                        HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
  +     fcfcfcfc                        HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r63.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
 !+     1513fcfc                        HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r4.w | ZWCOORDREGID = r5.y }
index 33e142de99951a6c81bf87687b70df730fea9e97..28e10b23bb1a0ba8f4b1bd5f21da363fbf1696ae 100644 (file)
@@ -2323,7 +2323,7 @@ registers:
        00000000        0xa630: 00000000
        00000000        RB_DBG_ECO_CNTL: 0
        00000001        RB_ADDR_MODE_CNTL: ADDR_64B
-       08000000        RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x10000 }
+       08000000        RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
        00000002        RB_NC_MODE_CNTL: { LOWER_BIT = 1 | UPPER_BIT = 0 }
        00000000        RB_PERFCTR_RB_SEL[0]+0: 00000000
        00000000        RB_PERFCTR_RB_SEL[0x1]+0: 00000000
@@ -3108,7 +3108,7 @@ got cmdszdw=438
 !+     00004001                                RB_MRT_FLAG_BUFFER[0].PITCH: { PITCH = 64 | ARRAY_PITCH = 1024 }
 !+     10000ad30                               RB_SAMPLE_COUNT_ADDR: 0x10000ad30
 !+     00000000                                RB_DBG_ECO_CNTL: 0
-!+     08000000                                RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_OFFSET = 0 | COLOR_OFFSET = 0x10000 }
+!+     08000000                                RB_CCU_CNTL: { DEPTH_OFFSET_HI = 0 | COLOR_OFFSET_HI = 0 | DEPTH_CACHE_SIZE = 0 | DEPTH_OFFSET = 0 | COLOR_CACHE_SIZE = CCU_COLOR_CACHE_SIZE_FULL | COLOR_OFFSET = 0x10000 }
 !+     00ffff00                                VPC_VS_CLIP_CNTL: { CLIP_MASK = 0 | CLIP_DIST_03_LOC = 255 | CLIP_DIST_47_LOC = 255 }
 !+     0000ffff                                VPC_VS_LAYER_CNTL: { LAYERLOC = 255 | VIEWLOC = 255 }
  +     00000000                                VPC_UNKNOWN_9107: { 0 }
@@ -3319,7 +3319,7 @@ got cmdszdw=438
  +     00000000                                HLSQ_DS_CNTL: { CONSTLEN = 0 }
  +     00000000                                HLSQ_GS_CNTL: { CONSTLEN = 0 }
 !+     00000003                                HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 | VARYINGS }
-!+     00000007                                HLSQ_CONTROL_1_REG: 0x7
+!+     00000007                                HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
 !+     fcfcfcfc                                HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
 !+     fcfcfc00                                HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r0.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
 !+     fcfcfcfc                                HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r63.x | ZWCOORDREGID = r63.x }
@@ -152886,7 +152886,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
   - cluster-name: CLUSTER_SP_PS
     - context: 0
        00000003        HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 | VARYINGS }
-       00000007        HLSQ_CONTROL_1_REG: 0x7
+       00000007        HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
        fcfcfcfc        HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
        fcfcfc00        HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r0.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
        fcfcfcfc        HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r63.x | ZWCOORDREGID = r63.x }
@@ -152913,7 +152913,7 @@ WARNING: 64b discontinuity (no _LO dword for 890d)
        00000000        HLSQ_CS_BINDLESS_BASE[0x4].DESCRIPTOR: { DESC_SIZE = 0 | ADDR = 0 }
     - context: 1
        00000003        HLSQ_FS_CNTL_0: { THREADSIZE = THREAD128 | VARYINGS }
-       00000007        HLSQ_CONTROL_1_REG: 0x7
+       00000007        HLSQ_CONTROL_1_REG: { PRIMALLOCTHRESHOLD = 7 }
        fcfcfcfc        HLSQ_CONTROL_2_REG: { FACEREGID = r63.x | SAMPLEID = r63.x | SAMPLEMASK = r63.x | CENTERRHW = r63.x }
        fcfcfc00        HLSQ_CONTROL_3_REG: { IJ_PERSP_PIXEL = r0.x | IJ_LINEAR_PIXEL = r63.x | IJ_PERSP_CENTROID = r63.x | IJ_LINEAR_CENTROID = r63.x }
        fcfcfcfc        HLSQ_CONTROL_4_REG: { IJ_PERSP_SAMPLE = r63.x | IJ_LINEAR_SAMPLE = r63.x | XYCOORDREGID = r63.x | ZWCOORDREGID = r63.x }
index fae7ed4662e64160d4c7f1215acb491892725f4f..29ff2bb6ba5584adc944e65cf2116b2bda853ec8 100644 (file)
@@ -153,6 +153,23 @@ struct fd_dev_info {
           * different views.
           */
          bool has_per_view_viewport;
+         bool has_gmem_fast_clear;
+
+         /* Per CCU GMEM amount reserved for each of DEPTH and COLOR caches
+          * in sysmem rendering. */
+         uint32_t sysmem_per_ccu_cache_size;
+         /* Per CCU GMEM amount reserved for color cache used by GMEM resolves
+          * which require color cache (non-BLIT event case).
+          * The size is expressed as a fraction of ccu cache used by sysmem
+          * rendering. If a GMEM resolve requires color cache, the driver needs
+          * to make sure it will not overwrite pixel data in GMEM that is still
+          * needed.
+          */
+         /* see enum a6xx_ccu_color_cache_size */
+         uint32_t gmem_ccu_color_cache_fraction;
+
+         /* Corresponds to HLSQ_CONTROL_1_REG::PRIMALLOCTHRESHOLD */
+         uint32_t prim_alloc_threshold;
 
          struct {
             uint32_t PC_POWER_CNTL;
index 11a3fba35029466b991ed88aea60deeea2368bcb..39ce9215c49055893eb3507316aeed0bfe25c992 100644 (file)
@@ -36,6 +36,12 @@ class CHIP(Enum):
     A6XX = 6
     A7XX = 7
 
+class CCUColorCacheFraction(Enum):
+    FULL = 0
+    HALF = 1
+    QUARTER = 2
+    EIGHTH = 3
+
 
 class State(object):
     def __init__(self):
@@ -143,6 +149,13 @@ class A6xxGPUInfo(GPUInfo):
         self.a6xx.has_cp_reg_write = True
         self.a6xx.has_8bpp_ubwc = True
 
+        self.a6xx.has_gmem_fast_clear = True
+
+        self.a6xx.sysmem_per_ccu_cache_size = 64 * 1024
+        self.a6xx.gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value
+
+        self.a6xx.prim_alloc_threshold = 0x7
+
         for name, val in template.items():
             if name == "magic": # handled above
                 continue
@@ -217,7 +230,7 @@ a6xx_gen1 = dict(
         fibers_per_sp = 128 * 16,
         reg_size_vec4 = 96,
         instr_cache_size = 64,
-        concurrent_resolve = True,
+        concurrent_resolve = False,
         indirect_draw_wfm_quirk = True,
         depth_bounds_require_depth_test_quirk = True,
     )
index 22b734263c4725624e4d65cb40a3c8b8aa42c5d7..e65528da4bd445651aab61b0776c8c42ec190e44 100644 (file)
@@ -2638,18 +2638,25 @@ to upconvert to 32b float internally?
        <!-- 0x8e00-0x8e03 invalid -->
        <reg32 offset="0x8e04" name="RB_DBG_ECO_CNTL" usage="cmd"/> <!-- TODO: valid mask 0xfffffeff -->
        <reg32 offset="0x8e05" name="RB_ADDR_MODE_CNTL" pos="0" type="a5xx_address_mode"/>
-
        <!-- 0x02080000 in GMEM, zero otherwise?  -->
        <reg32 offset="0x8e06" name="RB_UNKNOWN_8E06" variants="A7XX-" usage="cmd"/>
+       <enum name="a6xx_ccu_color_cache_size">
+               <value value="0x0" name="CCU_COLOR_CACHE_SIZE_FULL"/>
+               <value value="0x1" name="CCU_COLOR_CACHE_SIZE_HALF"/>
+               <value value="0x2" name="CCU_COLOR_CACHE_SIZE_QUARTER"/>
+               <value value="0x3" name="CCU_COLOR_CACHE_SIZE_EIGHTH"/>
+       </enum>
 
        <reg32 offset="0x8e07" name="RB_CCU_CNTL" usage="cmd">
+               <bitfield name="GMEM_FAST_CLEAR_DISABLE" pos="0" type="boolean"/>
                <!-- concurrent resolves are apparently a 2-bit enum on a650+ -->
                <bitfield name="CONCURRENT_RESOLVE" pos="2" type="boolean"/>
                <bitfield name="DEPTH_OFFSET_HI" pos="7" type="hex"/>
                <bitfield name="COLOR_OFFSET_HI" pos="9" type="hex"/>
+               <bitfield name="DEPTH_CACHE_SIZE" low="10" high="11" type="uint"/>
                <!-- GMEM offset of CCU depth cache -->
                <bitfield name="DEPTH_OFFSET" low="12" high="20" shr="12" type="hex"/>
-               <bitfield name="GMEM" pos="22" type="boolean"/> <!-- set for GMEM path -->
+               <bitfield name="COLOR_CACHE_SIZE" low="21" high="22" type="a6xx_ccu_color_cache_size"/>
                <!-- GMEM offset of CCU color cache
                        for GMEM rendering, we set it to GMEM size minus the minimum
                        CCU color cache size. CCU color cache will be needed in some
@@ -4018,7 +4025,10 @@ to upconvert to 32b float internally?
        <reg32 offset="0xb980" type="a6xx_hlsq_fs_cntl_0" name="HLSQ_FS_CNTL_0" variants="A6XX" usage="rp_blit"/>
        <reg32 offset="0xb981" name="HLSQ_UNKNOWN_B981" pos="0" type="boolean" variants="A6XX"/> <!-- never used by blob -->
        <reg32 offset="0xb982" name="HLSQ_CONTROL_1_REG" low="0" high="2" variants="A6XX" usage="rp_blit">
-               <!-- TODO: have test cases with either 0x3 or 0x7 -->
+               <!-- Sets the maximum number of primitives allowed in one FS wave minus one, similarly to the
+                                A3xx field, except that it's not necessary to set it to anything but the maximum, since
+                                the hardware will simply emit smaller waves when it runs out of space. -->
+               <bitfield name="PRIMALLOCTHRESHOLD" low="0" high="2" type="uint"/>
        </reg32>
        <reg32 offset="0xb983" name="HLSQ_CONTROL_2_REG" variants="A6XX" usage="rp_blit">
                <bitfield name="FACEREGID" low="0" high="7" type="a3xx_regid"/>
@@ -4031,13 +4041,11 @@ to upconvert to 32b float internally?
        <reg32 offset="0xb985" type="a6xx_hlsq_control_4_reg" name="HLSQ_CONTROL_4_REG" variants="A6XX" usage="rp_blit"/>
        <reg32 offset="0xb986" type="a6xx_hlsq_control_5_reg" name="HLSQ_CONTROL_5_REG" variants="A6XX" usage="rp_blit"/>
        <reg32 offset="0xb987" name="HLSQ_CS_CNTL" type="a6xx_hlsq_xs_cntl" variants="A6XX" usage="cmd"/>
-
        <!-- Either 0 or 0x401, the non-zero value is only in a few of dEQP-VK.ssbo.phys.layout.3_level_*.*8vec4 -->
        <reg32 offset="0xa9c5" name="HLSQ_UNKNOWN_A9C5" variants="A7XX-" usage="cmd"/>
-
        <reg32 offset="0xa9c6" type="a6xx_hlsq_fs_cntl_0" name="HLSQ_FS_CNTL_0" variants="A7XX-" usage="rp_blit"/>
        <reg32 offset="0xa9c7" name="HLSQ_CONTROL_1_REG" low="0" high="2" variants="A7XX-" usage="rp_blit">
-               <!-- TODO: have test cases with either 0x3 or 0x7 -->
+                       <bitfield name="PRIMALLOCTHRESHOLD" low="0" high="2" type="uint"/>
        </reg32>
        <reg32 offset="0xa9c8" name="HLSQ_CONTROL_2_REG" variants="A7XX-" usage="rp_blit">
                <bitfield name="FACEREGID" low="0" high="7" type="a3xx_regid"/>
index 015ee38bbe038776a04663d30747ca46b7b69a39..cb2a1263de5369f69e66e976a89e1dbc3208f128 100644 (file)
@@ -200,15 +200,22 @@ tu_emit_cache_flush_renderpass(struct tu_cmd_buffer *cmd_buffer)
 }
 
 static struct fd_reg_pair
-rb_ccu_cntl(uint32_t color_offset, bool gmem)
+rb_ccu_cntl(struct tu_device *dev, uint32_t color_offset)
 {
    uint32_t color_offset_hi = color_offset >> 21;
    color_offset &= 0x1fffff;
-   return A6XX_RB_CCU_CNTL(
-         .color_offset_hi = color_offset_hi,
-         .gmem = gmem,
-         .color_offset = color_offset,
-   );
+   enum a6xx_ccu_color_cache_size cache_size =
+      (a6xx_ccu_color_cache_size)(dev->physical_device->info->a6xx.gmem_ccu_color_cache_fraction);
+   bool concurrent_resolve = dev->physical_device->info->a6xx.concurrent_resolve;
+   return  A6XX_RB_CCU_CNTL(.gmem_fast_clear_disable =
+         !dev->physical_device->info->a6xx.has_gmem_fast_clear,
+      .concurrent_resolve = concurrent_resolve,
+      .depth_offset_hi = 0,
+      .color_offset_hi = color_offset_hi,
+      .depth_cache_size = 0,
+      .depth_offset = 0,
+      .color_cache_size = cache_size,
+      .color_offset = color_offset);
 }
 
 /* Cache flushes for things that use the color/depth read/write path (i.e.
@@ -252,12 +259,13 @@ tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer,
    tu6_emit_flushes(cmd_buffer, cs, &cmd_buffer->state.cache);
 
    if (ccu_state != cmd_buffer->state.ccu_state) {
-      struct tu_physical_device *phys_dev = cmd_buffer->device->physical_device;
+      struct tu_physical_device *phys_dev =
+         cmd_buffer->device->physical_device;
       tu_cs_emit_regs(cs,
-                      rb_ccu_cntl(ccu_state == TU_CMD_CCU_GMEM ?
-                                  phys_dev->ccu_offset_gmem :
-                                  phys_dev->ccu_offset_bypass,
-                                  ccu_state == TU_CMD_CCU_GMEM));
+         rb_ccu_cntl(cmd_buffer->device,
+         ccu_state == TU_CMD_CCU_GMEM ?
+            phys_dev->ccu_offset_gmem :
+            phys_dev->ccu_offset_bypass));
       cmd_buffer->state.ccu_state = ccu_state;
    }
 }
@@ -1045,7 +1053,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
    cmd->state.cache.pending_flush_bits &=
       ~(TU_CMD_FLAG_WAIT_FOR_IDLE | TU_CMD_FLAG_CACHE_INVALIDATE);
 
-   tu_cs_emit_regs(cs, rb_ccu_cntl(phys_dev->ccu_offset_bypass, false));
+   tu_cs_emit_regs(cs, rb_ccu_cntl(dev, phys_dev->ccu_offset_bypass));
    cmd->state.ccu_state = TU_CMD_CCU_SYSMEM;
    tu_cs_emit_write_reg(cs, REG_A6XX_RB_DBG_ECO_CNTL,
                         phys_dev->info->a6xx.magic.RB_DBG_ECO_CNTL);
index 96f434baf84a0e0a935cb6f863e7bb0295a98bdc..dad9b9740e71694a0df1ea651a62aa63b2626ef7 100644 (file)
@@ -612,12 +612,19 @@ tu_physical_device_init(struct tu_physical_device *device,
       goto fail_free_name;
    }
    switch (fd_dev_gen(&device->dev_id)) {
-   case 6:
+   case 6: {
       device->info = info;
-      device->ccu_offset_bypass = device->info->num_ccu * A6XX_CCU_DEPTH_SIZE;
-      device->ccu_offset_gmem = (device->gmem_size -
-         device->info->num_ccu * A6XX_CCU_GMEM_COLOR_SIZE);
+      uint32_t depth_cache_size =
+         device->info->num_ccu * device->info->a6xx.sysmem_per_ccu_cache_size;
+      uint32_t color_cache_size =
+         (device->info->num_ccu *
+          device->info->a6xx.sysmem_per_ccu_cache_size) /
+         (1 << device->info->a6xx.gmem_ccu_color_cache_fraction);
+
+      device->ccu_offset_bypass = depth_cache_size;
+      device->ccu_offset_gmem = device->gmem_size - color_cache_size;
       break;
+   }
    default:
       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
                                  "device %s is unsupported", device->name);
index 3612f7a9fd63afcb35fe04e751a4b8b9fc0a8090..cf065b684a923decadb6339a61661322a34bd540 100644 (file)
@@ -1489,7 +1489,8 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
    }
 
    tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CONTROL_1_REG, 5);
-   tu_cs_emit(cs, 0x7);
+   tu_cs_emit(cs, A6XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(
+      cs->device->physical_device->info->a6xx.prim_alloc_threshold));
    tu_cs_emit(cs, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
                   A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(samp_id_regid) |
                   A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(smask_in_regid) |
index c91e4137e7353f3e1ea457a7202e295c98898456..7df72dabf1e8b7773a22a3dbeebed5911c7a4afd 100644 (file)
@@ -746,16 +746,23 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 void
 fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gmem)
 {
+   enum a6xx_ccu_color_cache_size cache_size = (a6xx_ccu_color_cache_size)(screen->info->a6xx.gmem_ccu_color_cache_fraction);
    uint32_t offset = gmem ? screen->ccu_offset_gmem : screen->ccu_offset_bypass;
    uint32_t offset_hi = offset >> 21;
    offset &= 0x1fffff;
 
-   OUT_REG(ring, A6XX_RB_CCU_CNTL(
-         .concurrent_resolve = gmem && screen->info->a6xx.concurrent_resolve,
-         .color_offset_hi = offset_hi,
-         .gmem = gmem,
-         .color_offset = offset,
-   ));
+   OUT_REG(ring,
+           A6XX_RB_CCU_CNTL(.gmem_fast_clear_disable =
+                               !screen->info->a6xx.has_gmem_fast_clear,
+                            .concurrent_resolve =
+                               screen->info->a6xx.concurrent_resolve,
+                            .depth_offset_hi = 0,
+                            .color_offset_hi = offset_hi,
+                            .depth_cache_size = 0,
+                            .depth_offset = 0,
+                            .color_cache_size = cache_size,
+                            .color_offset = offset,
+                            ));
 }
 
 template void fd6_emit_cs_state<A6XX>(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd6_compute_state *cs);
index 7ba49ac9d78688c9baae800a213fd5332fc55a0b..818e4593331a553d81d75e53cb3ad250eb4efebe 100644 (file)
@@ -421,8 +421,8 @@ tex_opc_to_prefetch_cmd(opc_t tex_opc)
 
 template <chip CHIP>
 static void
-setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
-               struct fd6_program_state *state,
+setup_stateobj(struct fd_screen *screen, struct fd_ringbuffer *ring,
+               struct fd_context *ctx, struct fd6_program_state *state,
                const struct ir3_cache_key *cache_key,
                bool binning_pass) assert_dt
 {
@@ -890,7 +890,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
                      A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
 
    OUT_REG(ring,
-           HLSQ_CONTROL_1_REG(CHIP, 0x7), /* XXX */
+           HLSQ_CONTROL_1_REG(CHIP,
+            screen->info->a6xx.prim_alloc_threshold),
            HLSQ_CONTROL_2_REG(
                  CHIP,
                  .faceregid = face_regid,
@@ -1375,8 +1376,8 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs,
    }
 
    setup_config_stateobj<CHIP>(ctx, state);
-   setup_stateobj<CHIP>(state->binning_stateobj, ctx, state, key, true);
-   setup_stateobj<CHIP>(state->stateobj, ctx, state, key, false);
+   setup_stateobj<CHIP>(screen, state->binning_stateobj, ctx, state, key, true);
+   setup_stateobj<CHIP>(screen, state->stateobj, ctx, state, key, false);
    state->interp_stateobj = create_interp_stateobj(ctx, state);
 
    const struct ir3_stream_output_info *stream_output =
index 881aba30a7433b59ab08644d764b630e2e6a1801..8eb143ce8179b00c1ea2f81d7cf0dd9118afb900 100644 (file)
@@ -157,9 +157,14 @@ fd6_screen_init(struct pipe_screen *pscreen)
 
    screen->max_rts = A6XX_MAX_RENDER_TARGETS;
 
-   screen->ccu_offset_bypass = screen->info->num_ccu * A6XX_CCU_DEPTH_SIZE;
-   screen->ccu_offset_gmem = (screen->gmemsize_bytes -
-         screen->info->num_ccu * A6XX_CCU_GMEM_COLOR_SIZE);
+   uint32_t depth_cache_size =
+      screen->info->num_ccu * screen->info->a6xx.sysmem_per_ccu_cache_size;
+   uint32_t color_cache_size =
+      (screen->info->num_ccu * screen->info->a6xx.sysmem_per_ccu_cache_size) /
+      (1 << screen->info->a6xx.gmem_ccu_color_cache_fraction);
+
+   screen->ccu_offset_bypass = depth_cache_size;
+   screen->ccu_offset_gmem = screen->gmemsize_bytes - color_cache_size;
 
    /* Currently only FB_READ forces GMEM path, mostly because we'd have to
     * deal with cmdstream patching otherwise..