gallium: Pack the atomic counters just above the SSBOs.
authorEric Anholt <eric@anholt.net>
Fri, 20 Dec 2019 21:30:04 +0000 (13:30 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 21 Jan 2020 18:06:23 +0000 (10:06 -0800)
We carve out half the SSBO space for atomics, and we were just binding
them way up there.  freedreno was then using a remapping table to map the
sparse buffer index back down, since space in the descriptor array is a
shared resource that may limit parallelism.  That remapping table
generated inside of the ir3 compiler is getting thoroughly in the way of
implementing vulkan descriptor sets.

We will be able to get rid of the freedreno's remapping table, and
hopefully save shared resources on other hardware, by packing the atomics
tightly above the SSBOs (like i965 does).  We already rebind the shader
buffers on program change if either the old or new program has SSBOs or
ABOs, so this doesn't necessarily increase the program state change cost
(the only cost increase I can come up with is if you're using the same
atomic counter without rebinding it across changes of programs with
varying SSBO counts, meaning it would now bounce around index space).

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3240>

src/mesa/state_tracker/st_atom_atomicbuf.c
src/mesa/state_tracker/st_atom_list.h
src/mesa/state_tracker/st_atom_storagebuf.c
src/mesa/state_tracker/st_context.h
src/mesa/state_tracker/st_glsl_to_nir.cpp
src/mesa/state_tracker/st_glsl_to_tgsi.cpp

index dad2b65..1855c77 100644 (file)
@@ -75,10 +75,10 @@ st_bind_atomics(struct st_context *st, struct gl_program *prog,
       return;
 
    /* For !has_hw_atomics, the atomic counters have been rewritten to be above
-    * the SSBO range.
+    * the SSBOs used by the program.
     */
-   unsigned buffer_base = st->ctx->Const.Program[stage].MaxShaderStorageBlocks;
-
+   unsigned buffer_base = prog->info.num_ssbos;
+   unsigned used_bindings = 0;
    for (i = 0; i < prog->sh.data->NumAtomicBuffers; i++) {
       struct gl_active_atomic_buffer *atomic =
          &prog->sh.data->AtomicBuffers[i];
@@ -88,7 +88,9 @@ st_bind_atomics(struct st_context *st, struct gl_program *prog,
 
       st->pipe->set_shader_buffers(st->pipe, shader_type,
                                    buffer_base + atomic->Binding, 1, &sb, 0x1);
+      used_bindings = MAX2(atomic->Binding + 1, used_bindings);
    }
+   st->last_used_atomic_bindings[shader_type] = used_bindings;
 }
 
 void
index e1aebc9..a4d5583 100644 (file)
@@ -57,6 +57,9 @@ ST_STATE(ST_NEW_TES_ATOMICS, st_bind_tes_atomics)
 ST_STATE(ST_NEW_FS_ATOMICS, st_bind_fs_atomics)
 ST_STATE(ST_NEW_GS_ATOMICS, st_bind_gs_atomics)
 
+/* SSBOs depend on the _atomics having been updated first in the
+ * !has_hw_atomics case.
+ */
 ST_STATE(ST_NEW_VS_SSBOS, st_bind_vs_ssbos)
 ST_STATE(ST_NEW_TCS_SSBOS, st_bind_tcs_ssbos)
 ST_STATE(ST_NEW_TES_SSBOS, st_bind_tes_ssbos)
index 5ffafaa..0355c98 100644 (file)
@@ -46,12 +46,9 @@ st_bind_ssbos(struct st_context *st, struct gl_program *prog,
 {
    unsigned i;
    struct pipe_shader_buffer buffers[MAX_SHADER_STORAGE_BUFFERS];
-   struct gl_program_constants *c;
    if (!prog || !st->pipe->set_shader_buffers)
       return;
 
-   c = &st->ctx->Const.Program[prog->info.stage];
-
    for (i = 0; i < prog->info.num_ssbos; i++) {
       struct gl_buffer_binding *binding;
       struct st_buffer_object *st_obj;
@@ -81,13 +78,19 @@ st_bind_ssbos(struct st_context *st, struct gl_program *prog,
    st->pipe->set_shader_buffers(st->pipe, shader_type, 0,
                                 prog->info.num_ssbos, buffers,
                                 prog->sh.ShaderStorageBlocksWriteAccess);
-   /* clear out any stale shader buffers */
-   if (prog->info.num_ssbos < c->MaxShaderStorageBlocks)
+
+   /* Clear out any stale shader buffers (or lowered atomic counters). */
+   int num_ssbos = prog->info.num_ssbos;
+   if (!st->has_hw_atomics)
+      num_ssbos += st->last_used_atomic_bindings[shader_type];
+   if (st->last_num_ssbos[shader_type] > num_ssbos) {
       st->pipe->set_shader_buffers(
             st->pipe, shader_type,
-            prog->info.num_ssbos,
-            c->MaxShaderStorageBlocks - prog->info.num_ssbos,
+            num_ssbos,
+            st->last_num_ssbos[shader_type] - num_ssbos,
             NULL, 0);
+      st->last_num_ssbos[shader_type] = num_ssbos;
+   }
 }
 
 void st_bind_vs_ssbos(struct st_context *st)
index 9a6a1f8..6870881 100644 (file)
@@ -337,6 +337,9 @@ struct st_context
    /* The number of vertex buffers from the last call of validate_arrays. */
    unsigned last_num_vbuffers;
 
+   unsigned last_used_atomic_bindings[PIPE_SHADER_TYPES];
+   unsigned last_num_ssbos[PIPE_SHADER_TYPES];
+
    int32_t draw_stamp;
    int32_t read_stamp;
 
index d19398b..fadd1b4 100644 (file)
@@ -504,8 +504,7 @@ st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog,
    nir_remove_dead_variables(nir, mask);
 
    if (!st->has_hw_atomics)
-      NIR_PASS_V(nir, nir_lower_atomics_to_ssbo,
-                 st->ctx->Const.Program[nir->info.stage].MaxShaderStorageBlocks);
+      NIR_PASS_V(nir, nir_lower_atomics_to_ssbo, prog->info.num_ssbos);
 
    st_finalize_nir_before_variants(nir);
 
index aec59e7..c9f8d13 100644 (file)
@@ -3448,7 +3448,7 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
       resource = buffer;
    } else {
       st_src_reg buffer(PROGRAM_BUFFER,
-                        ctx->Const.Program[shader->Stage].MaxShaderStorageBlocks +
+                        prog->info.num_ssbos +
                         location->data.binding,
                         GLSL_TYPE_ATOMIC_UINT);
 
@@ -7051,7 +7051,7 @@ st_translate_program(
 
       if (!st_context(ctx)->has_hw_atomics) {
          for (i = 0; i < prog->info.num_abos; i++) {
-            unsigned index = (frag_const->MaxShaderStorageBlocks +
+            unsigned index = (prog->info.num_ssbos +
                               prog->sh.AtomicBuffers[i]->Binding);
             assert(prog->sh.AtomicBuffers[i]->Binding <
                    frag_const->MaxAtomicBuffers);