anv: avoid MI commands to copy draw indirect count
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Thu, 14 Sep 2023 16:10:10 +0000 (19:10 +0300)
committerMarge Bot <emma+marge@anholt.net>
Fri, 20 Oct 2023 13:07:53 +0000 (13:07 +0000)
We can just make the address of the count available to the generation
shader.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Tested-by: Felix DeGrood <felix.j.degrood@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25361>

src/intel/vulkan/anv_internal_kernels.c
src/intel/vulkan/anv_internal_kernels.h
src/intel/vulkan/genX_cmd_draw_generated_indirect.h
src/intel/vulkan/shaders/common_generated_draws.glsl
src/intel/vulkan/shaders/gfx11_generated_draws.glsl
src/intel/vulkan/shaders/gfx9_generated_draws.glsl

index dcbb21c..af36228 100644 (file)
@@ -355,10 +355,10 @@ anv_device_init_internal_kernels(struct anv_device *device)
                        ARRAY_SIZE(gfx11_generated_draws_spv_source) :
                        ARRAY_SIZE(gfx9_generated_draws_spv_source),
          .send_count = device->info->ver >= 11 ?
-                       11 /* 2 * (2 loads + 3 stores) + 1 store */ :
-                       17 /* 2 * (2 loads + 6 stores) + 1 store */,
+                       12 /* 2 * (2 loads + 3 stores) + 1 load + 1 store */ :
+                       18 /* 2 * (2 loads + 6 stores) + 1 load + 1 store */,
          .bind_map   = {
-            .num_bindings = 4,
+            .num_bindings = 5,
             .bindings     = {
                {
                   .address_offset = offsetof(struct anv_generated_indirect_params,
@@ -373,6 +373,10 @@ anv_device_init_internal_kernels(struct anv_device *device)
                                              draw_ids_addr),
                },
                {
+                  .address_offset = offsetof(struct anv_generated_indirect_params,
+                                             draw_count_addr),
+               },
+               {
                   .push_constant = true,
                },
             },
index f93c827..c85b299 100644 (file)
@@ -33,7 +33,7 @@
 
 /* This needs to match common_generated_draws.glsl :
  *
- *    layout(set = 0, binding = 3) uniform block
+ *    layout(set = 0, binding = 4) uniform block
  */
 struct anv_generated_indirect_draw_params {
    /* Draw ID buffer address (only used on Gfx9) */
@@ -47,18 +47,12 @@ struct anv_generated_indirect_draw_params {
     * gl_FragCoord
     */
    uint32_t draw_base;
-
-   /* Number of draws to generate */
-   uint32_t draw_count;
-
    /* Maximum number of draws (equals to draw_count for indirect draws without
     * an indirect count)
     */
    uint32_t max_draw_count;
-
    /* Instance multiplier for multi view */
    uint32_t instance_multiplier;
-
    /* Address where to jump at after the generated draw (only used with
     * indirect draw count variants)
     */
@@ -68,6 +62,9 @@ struct anv_generated_indirect_draw_params {
 struct anv_generated_indirect_params {
    struct anv_generated_indirect_draw_params draw;
 
+   /* Draw count value for non count variants of draw indirect commands */
+   uint32_t draw_count;
+
    /* Global address of binding 0 */
    uint64_t indirect_data_addr;
 
@@ -77,6 +74,9 @@ struct anv_generated_indirect_params {
    /* Global address of binding 2 */
    uint64_t draw_ids_addr;
 
+   /* Global address of binding 3 (points to the draw_count field above) */
+   uint64_t draw_count_addr;
+
    /* CPU side pointer to the previous item when number of draws has to be
     * split into smaller chunks, see while loop in
     * genX(cmd_buffer_emit_indirect_generated_draws)
index 51b59ac..bdb70fa 100644 (file)
@@ -53,7 +53,6 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
                                      bool indexed)
 {
    struct anv_device *device = cmd_buffer->device;
-   struct anv_batch *batch = &cmd_buffer->generation_batch;
 
    struct anv_state push_data_state =
       genX(simple_shader_alloc_push)(&cmd_buffer->generation_shader_state,
@@ -62,6 +61,16 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
    struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
 
+   struct anv_address draw_count_addr;
+   if (anv_address_is_null(count_addr)) {
+      draw_count_addr = anv_address_add(
+         genX(simple_shader_push_state_address)(
+            &cmd_buffer->generation_shader_state, push_data_state),
+         offsetof(struct anv_generated_indirect_params, draw_count));
+   } else {
+      draw_count_addr = count_addr;
+   }
+
    struct anv_generated_indirect_params *push_data = push_data_state.map;
    *push_data = (struct anv_generated_indirect_params) {
       .draw                      = {
@@ -79,39 +88,16 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
                                              ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
                                    ((generated_cmd_stride / 4) << 16),
          .draw_base              = item_base,
-         /* If count_addr is not NULL, we'll edit it through a the command
-          * streamer.
-          */
-         .draw_count             = anv_address_is_null(count_addr) ? max_count : 0,
          .max_draw_count         = max_count,
          .instance_multiplier    = pipeline->instance_multiplier,
       },
+      .draw_count                = anv_address_is_null(count_addr) ? max_count : 0,
       .indirect_data_addr        = anv_address_physical(indirect_data_addr),
       .generated_cmds_addr       = anv_address_physical(generated_cmds_addr),
       .draw_ids_addr             = anv_address_physical(draw_id_addr),
+      .draw_count_addr           = anv_address_physical(draw_count_addr),
    };
 
-   if (!anv_address_is_null(count_addr)) {
-      /* Copy the draw count into the push constants so that the generation
-       * gets the value straight away and doesn't even need to access memory.
-       */
-      struct mi_builder b;
-      mi_builder_init(&b, device->info, batch);
-      mi_memcpy(&b,
-                anv_address_add(
-                   genX(simple_shader_push_state_address)(
-                      &cmd_buffer->generation_shader_state,
-                      push_data_state),
-                   offsetof(struct anv_generated_indirect_params, draw.draw_count)),
-                count_addr, 4);
-
-      /* Make sure the memcpy landed for the generating draw call to pick up
-       * the value.
-       */
-      genx_batch_emit_pipe_control(batch, cmd_buffer->device->info,
-                                   ANV_PIPE_CS_STALL_BIT);
-   }
-
    genX(emit_simple_shader_dispatch)(&cmd_buffer->generation_shader_state,
                                      item_count, push_data_state);
 
index 09a8498..9cdd7c2 100644 (file)
@@ -41,14 +41,26 @@ layout(set = 0, binding = 2, std430) buffer Storage2 {
    uint draw_ids[];
 };
 
+/* We're not using a uniform block for this because our compiler
+ * infrastructure relies on UBOs to be 32-bytes aligned so that we can push
+ * them into registers. This value can come directly from the indirect buffer
+ * given to indirect draw commands and the requirement there is 4-bytes
+ * alignment.
+ *
+ * Also use a prefix to the variable to remember to make a copy of it, avoid
+ * unnecessary accesses.
+ */
+layout(set = 0, binding = 3) buffer Storage3 {
+   uint _draw_count;
+};
+
 /* This data will be provided through push constants. */
-layout(set = 0, binding = 3) uniform block {
+layout(set = 0, binding = 4) uniform block {
    uint64_t draw_id_addr;
    uint64_t indirect_data_addr;
    uint indirect_data_stride;
    uint flags;
    uint draw_base;
-   uint draw_count;
    uint max_draw_count;
    uint instance_multiplier;
    uint64_t end_addr;
index 355d383..4b8f2e5 100644 (file)
@@ -80,6 +80,7 @@ void main()
    uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
    uint cmd_idx = item_idx * _3dprim_dw_size;
    uint draw_id = draw_base + item_idx;
+   uint draw_count = _draw_count;
 
    if (draw_id < draw_count)
       write_draw(item_idx, cmd_idx, draw_id);
index a24f2c3..d7fbd7e 100644 (file)
@@ -138,6 +138,7 @@ void main()
    uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
    uint cmd_idx = item_idx * _3dprim_dw_size;
    uint draw_id = draw_base + item_idx;
+   uint draw_count = _draw_count;
 
    if (draw_id < draw_count)
       write_draw(item_idx, cmd_idx, draw_id);