From d2224544d68d7aedcbaf83fdec9ea7fc7603bf6a Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 1 Aug 2023 07:26:03 -0400 Subject: [PATCH] agx: Allow 64-bit memory regs The mask is based on the format, which can be at most 32-bits per channel. So if we have 64-bit loads/stores we're still using a 32-bit format with double the bits set in the mask. This will fix validation fails with spilling. No shader-db changes. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_pack.c | 3 +-- src/asahi/compiler/agx_register_allocate.c | 8 ++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/asahi/compiler/agx_pack.c b/src/asahi/compiler/agx_pack.c index e46f238..3471fbc 100644 --- a/src/asahi/compiler/agx_pack.c +++ b/src/asahi/compiler/agx_pack.c @@ -157,10 +157,9 @@ agx_pack_pbe_lod(agx_index index, bool *flag) static unsigned agx_pack_memory_reg(agx_index index, bool *flag) { - assert(index.size == AGX_SIZE_16 || index.size == AGX_SIZE_32); assert_register_is_aligned(index); - *flag = (index.size == AGX_SIZE_32); + *flag = (index.size >= AGX_SIZE_32); return index.value; } diff --git a/src/asahi/compiler/agx_register_allocate.c b/src/asahi/compiler/agx_register_allocate.c index 7b5c757..2a09876 100644 --- a/src/asahi/compiler/agx_register_allocate.c +++ b/src/asahi/compiler/agx_register_allocate.c @@ -56,7 +56,10 @@ agx_write_registers(const agx_instr *I, unsigned d) case AGX_OPCODE_DEVICE_LOAD: case AGX_OPCODE_LOCAL_LOAD: case AGX_OPCODE_LD_TILE: - return util_bitcount(I->mask) * size; + /* Can write 16-bit or 32-bit. Anything logically 64-bit is already + * expanded to 32-bit in the mask. + */ + return util_bitcount(I->mask) * MIN2(size, 2); case AGX_OPCODE_LDCF: return 6; @@ -215,8 +218,9 @@ agx_read_registers(const agx_instr *I, unsigned s) case AGX_OPCODE_DEVICE_STORE: case AGX_OPCODE_LOCAL_STORE: case AGX_OPCODE_ST_TILE: + /* See agx_write_registers */ if (s == 0) - return util_bitcount(I->mask) * size; + return util_bitcount(I->mask) * MIN2(size, 2); else return size; -- 2.7.4