freedreno: Add per-device parameters for private memory
authorConnor Abbott <cwabbott0@gmail.com>
Thu, 29 Oct 2020 14:08:36 +0000 (15:08 +0100)
committerConnor Abbott <cwabbott0@gmail.com>
Thu, 19 Nov 2020 16:55:03 +0000 (17:55 +0100)
We have to allocate backing storage big enough to hold all the private
memory for all threads that can possibly be in flight, which means that
we have to start filling in some more model-specific information as the
sizes will be different for models with different core counts/ALU
counts.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7386>

src/freedreno/common/freedreno_dev_info.c
src/freedreno/common/freedreno_dev_info.h

index 7297411..5473be6 100644 (file)
@@ -47,6 +47,8 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id)
                switch (gpu_id) {
                case 615:
                case 618:
+                       info->num_sp_cores = 1;
+                       info->fibers_per_sp = 128 * 16;
                        info->a6xx.ccu_offset_gmem = 0x7c000;
                        info->a6xx.ccu_offset_bypass = 0x10000;
                        info->a6xx.ccu_cntl_gmem_unk2 = true;
@@ -56,6 +58,8 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id)
                        info->a6xx.magic.SP_UNKNOWN_A0F8 = 0;
                        break;
                case 630:
+                       info->num_sp_cores = 2;
+                       info->fibers_per_sp = 128 * 16;
                        info->a6xx.ccu_offset_gmem = 0xf8000;
                        info->a6xx.ccu_offset_bypass = 0x20000;
                        info->a6xx.ccu_cntl_gmem_unk2 = true;
@@ -65,6 +69,19 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id)
                        info->a6xx.magic.SP_UNKNOWN_A0F8 = 1;
                        break;
                case 640:
+                       info->num_sp_cores = 2;
+                       /* The wavefront ID returned by the getwid instruction has a
+                        * maximum of 3 * 10 - 1, or so it seems. However the swizzled
+                        * index used in the mem offset calcuation is
+                        * "(wid / 3) | ((wid % 3) << 4)", so that the actual max is
+                        * around 3 * 16. Furthermore, with the per-fiber layout, the HW
+                        * swizzles the wavefront index and fiber index itself, and it
+                        * pads the number of wavefronts to 4 * 16 to make the swizzling
+                        * simpler, so we have to bump the number of wavefronts to 4 * 16
+                        * for the per-fiber layout. We could theoretically reduce it for
+                        * the per-wave layout though.
+                        */
+                       info->fibers_per_sp = 128 * 4 * 16;
                        info->a6xx.ccu_offset_gmem = 0xf8000;
                        info->a6xx.ccu_offset_bypass = 0x20000;
                        info->a6xx.supports_multiview_mask = true;
@@ -73,6 +90,8 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id)
                        info->a6xx.magic.SP_UNKNOWN_A0F8 = 1;
                        break;
                case 650:
+                       info->num_sp_cores = 3;
+                       info->fibers_per_sp = 128 * 2 * 16;
                        info->a6xx.ccu_offset_gmem = 0x114000;
                        info->a6xx.ccu_offset_bypass = 0x30000;
                        info->a6xx.supports_multiview_mask = true;
index c335191..924adf8 100644 (file)
@@ -42,6 +42,9 @@ struct freedreno_dev_info {
        
        uint32_t num_vsc_pipes;
 
+       /* Information for private memory calculations */
+       uint32_t num_sp_cores, fibers_per_sp;
+
        union {
                struct {
                        /* Whether the PC_MULTIVIEW_MASK register exists. */