freedreno: Specify GMEM tile alignment per GPU
authorRob Clark <robdclark@chromium.org>
Sun, 22 Jan 2023 18:57:30 +0000 (10:57 -0800)
committerMarge Bot <emma+marge@anholt.net>
Sat, 18 Mar 2023 18:21:53 +0000 (18:21 +0000)
They differ presumably based on # of CCU/SP and DDR bus topology.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21573>

src/freedreno/common/freedreno_devices.py
src/gallium/drivers/freedreno/freedreno_gmem.c

index e9fd3b5..4dd66c6 100644 (file)
@@ -108,19 +108,17 @@ class A6xxGPUInfo(GPUInfo):
        into distinct sub-generations.  The template parameter avoids
        duplication of parameters that are unique to the sub-generation.
     """
-    def __init__(self, template, num_sp_cores, num_ccu, magic_regs):
+    def __init__(self, template, num_ccu, tile_align_w, tile_align_h, magic_regs):
         super().__init__(gmem_align_w = 16, gmem_align_h = 4,
-                         tile_align_w = 32, tile_align_h = 32,
+                         tile_align_w = tile_align_w,
+                         tile_align_h = tile_align_h,
                          tile_max_w   = 1024, # max_bitfield_val(5, 0, 5)
                          tile_max_h   = max_bitfield_val(14, 8, 4),
                          num_vsc_pipes = 32)
-        assert(num_sp_cores == num_ccu)
 
-        self.num_sp_cores = num_sp_cores
-
-        # 96 tile alignment seems correlated to 3 CCU
-        if num_ccu == 3:
-            self.tile_align_w = 96
+        # The # of SP cores seems to always match # of CCU
+        self.num_sp_cores = num_ccu
+        self.num_ccu = num_ccu
 
         self.a6xx = Struct()
         self.a6xx.magic = Struct()
@@ -273,8 +271,9 @@ add_gpus([
         GPUId(619),
     ], A6xxGPUInfo(
         a6xx_gen1,
-        num_sp_cores = 1,
         num_ccu = 1,
+        tile_align_w = 32,
+        tile_align_h = 16,
         magic_regs = dict(
             PC_POWER_CNTL = 0,
             TPL1_DBG_ECO_CNTL = 0x00108000,
@@ -296,8 +295,9 @@ add_gpus([
         GPUId(620),
     ], A6xxGPUInfo(
         a6xx_gen1,
-        num_sp_cores = 1,
         num_ccu = 1,
+        tile_align_w = 32,
+        tile_align_h = 16,
         magic_regs = dict(
             PC_POWER_CNTL = 0,
             TPL1_DBG_ECO_CNTL = 0x01008000,
@@ -319,8 +319,9 @@ add_gpus([
         GPUId(630),
     ], A6xxGPUInfo(
         a6xx_gen1,
-        num_sp_cores = 2,
         num_ccu = 2,
+        tile_align_w = 32,
+        tile_align_h = 16,
         magic_regs = dict(
             PC_POWER_CNTL = 1,
             TPL1_DBG_ECO_CNTL = 0x00108000,
@@ -342,8 +343,9 @@ add_gpus([
         GPUId(640),
     ], A6xxGPUInfo(
         a6xx_gen2,
-        num_sp_cores = 2,
         num_ccu = 2,
+        tile_align_w = 32,
+        tile_align_h = 16,
         magic_regs = dict(
             PC_POWER_CNTL = 1,
             TPL1_DBG_ECO_CNTL = 0x00008000,
@@ -365,8 +367,9 @@ add_gpus([
         GPUId(680),
     ], A6xxGPUInfo(
         a6xx_gen2,
-        num_sp_cores = 4,
         num_ccu = 4,
+        tile_align_w = 64,
+        tile_align_h = 32,
         magic_regs = dict(
             PC_POWER_CNTL = 3,
             TPL1_DBG_ECO_CNTL = 0x00108000,
@@ -388,8 +391,9 @@ add_gpus([
         GPUId(650),
     ], A6xxGPUInfo(
         a6xx_gen3,
-        num_sp_cores = 3,
         num_ccu = 3,
+        tile_align_w = 96,
+        tile_align_h = 48,
         magic_regs = dict(
             PC_POWER_CNTL = 2,
             # this seems to be a chicken bit that fixes cubic filtering:
@@ -416,8 +420,9 @@ add_gpus([
         GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"),
     ], A6xxGPUInfo(
         a6xx_gen4,
-        num_sp_cores = 2,
         num_ccu = 2,
+        tile_align_w = 32,
+        tile_align_h = 16,
         magic_regs = dict(
             PC_POWER_CNTL = 1,
             TPL1_DBG_ECO_CNTL = 0x05008000,
@@ -439,8 +444,9 @@ add_gpus([
         GPUId(660),
     ], A6xxGPUInfo(
         a6xx_gen4,
-        num_sp_cores = 3,
         num_ccu = 3,
+        tile_align_w = 96,
+        tile_align_h = 16,
         magic_regs = dict(
             PC_POWER_CNTL = 2,
             TPL1_DBG_ECO_CNTL = 0x05008000,
index f0f7dc5..20b1698 100644 (file)
@@ -549,7 +549,7 @@ gmem_key_init(struct fd_batch *batch, bool assume_zs, bool no_scis_opt)
        */
       key->gmem_page_align = 8;
    } else if (is_a6xx(screen)) {
-      key->gmem_page_align = (screen->info->tile_align_w == 96) ? 3 : 1;
+      key->gmem_page_align = screen->info->num_ccu;
    } else {
       // TODO re-check this across gens.. maybe it should only
       // be a single page in some cases: