From c4874b4cee6925f3faa2d68e780c7ea0d16c7ba8 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Tue, 8 Aug 2023 09:53:16 -0700 Subject: [PATCH] freedreno/a5xx: Set num_sp_cores and set PC/VFD_POWER_CNTL accordingly. Based on libwrap tracing of the blob. Part-of: --- src/freedreno/common/freedreno_devices.py | 30 ++++++++++++++++++++++++++- src/gallium/drivers/freedreno/a5xx/fd5_gmem.c | 10 +++++---- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py index 60ed826..c8d96c9 100644 --- a/src/freedreno/common/freedreno_devices.py +++ b/src/freedreno/common/freedreno_devices.py @@ -232,8 +232,36 @@ add_gpus([ GPUId(506), GPUId(508), GPUId(509), + ], GPUInfo( + CHIP.A5XX, + gmem_align_w = 64, gmem_align_h = 32, + tile_align_w = 64, tile_align_h = 32, + tile_max_w = 1024, # max_bitfield_val(7, 0, 5) + tile_max_h = max_bitfield_val(16, 9, 5), + num_vsc_pipes = 16, + cs_shared_mem_size = 32 * 1024, + num_sp_cores = 1, + wave_granularity = 2, + fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd. + )) + +add_gpus([ GPUId(510), GPUId(512), + ], GPUInfo( + CHIP.A5XX, + gmem_align_w = 64, gmem_align_h = 32, + tile_align_w = 64, tile_align_h = 32, + tile_max_w = 1024, # max_bitfield_val(7, 0, 5) + tile_max_h = max_bitfield_val(16, 9, 5), + num_vsc_pipes = 16, + cs_shared_mem_size = 32 * 1024, + num_sp_cores = 2, + wave_granularity = 2, + fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd. + )) + +add_gpus([ GPUId(530), GPUId(540), ], GPUInfo( @@ -244,7 +272,7 @@ add_gpus([ tile_max_h = max_bitfield_val(16, 9, 5), num_vsc_pipes = 16, cs_shared_mem_size = 32 * 1024, - num_sp_cores = 0, # TODO + num_sp_cores = 4, wave_granularity = 2, fibers_per_sp = 0, # TODO )) diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c b/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c index 9584705..5b8b728 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c @@ -394,6 +394,7 @@ emit_binning_pass(struct fd_batch *batch) assert_dt static void fd5_emit_tile_init(struct fd_batch *batch) assert_dt { + struct fd_context *ctx = batch->ctx; struct fd_ringbuffer *ring = batch->gmem; struct pipe_framebuffer_state *pfb = &batch->framebuffer; @@ -411,10 +412,10 @@ fd5_emit_tile_init(struct fd_batch *batch) assert_dt OUT_RING(ring, 0x0); OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1); - OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ + OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* PC_POWER_CNTL */ OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1); - OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ + OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* VFD_POWER_CNTL */ /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ fd_wfi(batch, ring); @@ -715,6 +716,7 @@ fd5_emit_tile_fini(struct fd_batch *batch) assert_dt static void fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt { + struct fd_context *ctx = batch->ctx; struct fd_ringbuffer *ring = batch->gmem; fd5_emit_restore(batch, ring); @@ -730,10 +732,10 @@ fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false); OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1); - OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ + OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* PC_POWER_CNTL */ OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1); - OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ + OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* VFD_POWER_CNTL */ /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ fd_wfi(batch, ring); -- 2.7.4