From c94ff7dd811ad9739b4df92f1cb1ff068ddfdd25 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Fri, 6 Aug 2021 15:39:02 -0700 Subject: [PATCH] freedreno/a5xx: Reduce packet emits for SSBO state. This is what I see happening in dEQP-VK.spirv_assembly.instruction.compute.opatomic_storage_buffer.load on pixel 2 (also where I found a buffer big enough to show how to encode the size). Part-of: --- src/gallium/drivers/freedreno/a5xx/fd5_emit.c | 37 +++++++++++++++------------ 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c index 85bb1b8..08e1173 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -429,31 +429,34 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring, { unsigned count = util_last_bit(so->enabled_mask); - for (unsigned i = 0; i < count; i++) { - OUT_PKT7(ring, CP_LOAD_STATE4, 5); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(i) | - CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | - CP_LOAD_STATE4_0_STATE_BLOCK(sb) | - CP_LOAD_STATE4_0_NUM_UNIT(1)); - OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) | - CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2 * count); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(count)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + for (unsigned i = 0; i < count; i++) { struct pipe_shader_buffer *buf = &so->sb[i]; unsigned sz = buf->buffer_size; /* Unlike a6xx, SSBO size is in bytes. */ OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz & MASK(16))); OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16)); + } - OUT_PKT7(ring, CP_LOAD_STATE4, 5); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(i) | - CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | - CP_LOAD_STATE4_0_STATE_BLOCK(sb) | - CP_LOAD_STATE4_0_NUM_UNIT(1)); - OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_UBO) | - CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + OUT_PKT7(ring, CP_LOAD_STATE4, 3 + 2 * count); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(count)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_UBO) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); + for (unsigned i = 0; i < count; i++) { + struct pipe_shader_buffer *buf = &so->sb[i]; if (buf->buffer) { struct fd_resource *rsc = fd_resource(buf->buffer); -- 2.7.4