anv: decouple util function from anv_cmd_buffer
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Sat, 26 Feb 2022 12:00:07 +0000 (14:00 +0200)
committerMarge Bot <emma+marge@anholt.net>
Fri, 23 Dec 2022 22:52:50 +0000 (22:52 +0000)
The issue we're addressing here is that we have 2 batches and the both
grow at different rate. We want to keep doubling the main batch size
as the application writes more and more commands to limit the number
of GEM BOs. But we don't want to have the generation batch size to be
linked to the main batch.

v2: remove gfx7 code

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15642>

src/intel/vulkan/anv_batch_chain.c
src/intel/vulkan/anv_cmd_buffer.c
src/intel/vulkan/anv_private.h

index 299be1c..fd64598 100644 (file)
@@ -410,6 +410,12 @@ anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer)
    return list_entry(cmd_buffer->batch_bos.prev, struct anv_batch_bo, link);
 }
 
+static struct anv_batch_bo *
+anv_cmd_buffer_current_generation_batch_bo(struct anv_cmd_buffer *cmd_buffer)
+{
+   return list_entry(cmd_buffer->generation_batch_bos.prev, struct anv_batch_bo, link);
+}
+
 struct anv_address
 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer)
 {
@@ -422,34 +428,34 @@ anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer)
 }
 
 static void
-emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer,
+emit_batch_buffer_start(struct anv_batch *batch,
                         struct anv_bo *bo, uint32_t offset)
 {
-   /* In gfx8+ the address field grew to two dwords to accommodate 48 bit
-    * offsets. The high 16 bits are in the last dword, so we can use the gfx8
-    * version in either case, as long as we set the instruction length in the
-    * header accordingly.  This means that we always emit three dwords here
-    * and all the padding and adjustment we do in this file works for all
-    * gens.
-    */
-
-   const uint32_t gfx8_length =
-      GFX8_MI_BATCH_BUFFER_START_length - GFX8_MI_BATCH_BUFFER_START_length_bias;
-
-   anv_batch_emit(&cmd_buffer->batch, GFX8_MI_BATCH_BUFFER_START, bbs) {
-      bbs.DWordLength               = gfx8_length;
+   anv_batch_emit(batch, GFX8_MI_BATCH_BUFFER_START, bbs) {
+      bbs.DWordLength               = GFX8_MI_BATCH_BUFFER_START_length -
+                                      GFX8_MI_BATCH_BUFFER_START_length_bias;
       bbs.SecondLevelBatchBuffer    = Firstlevelbatch;
       bbs.AddressSpaceIndicator     = ASI_PPGTT;
       bbs.BatchBufferStartAddress   = (struct anv_address) { bo, offset };
    }
 }
 
+enum anv_cmd_buffer_batch {
+   ANV_CMD_BUFFER_BATCH_MAIN,
+   ANV_CMD_BUFFER_BATCH_GENERATION,
+};
+
 static void
 cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer,
-                             struct anv_batch_bo *bbo)
+                             struct anv_batch_bo *bbo,
+                             enum anv_cmd_buffer_batch batch_type)
 {
-   struct anv_batch *batch = &cmd_buffer->batch;
+   struct anv_batch *batch =
+      batch_type == ANV_CMD_BUFFER_BATCH_GENERATION ?
+      &cmd_buffer->generation_batch : &cmd_buffer->batch;
    struct anv_batch_bo *current_bbo =
+      batch_type == ANV_CMD_BUFFER_BATCH_GENERATION ?
+      anv_cmd_buffer_current_generation_batch_bo(cmd_buffer) :
       anv_cmd_buffer_current_batch_bo(cmd_buffer);
 
    /* We set the end of the batch a little short so we would be sure we
@@ -459,7 +465,7 @@ cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer,
    batch->end += GFX8_MI_BATCH_BUFFER_START_length * 4;
    assert(batch->end == current_bbo->bo->map + current_bbo->bo->size);
 
-   emit_batch_buffer_start(cmd_buffer, bbo->bo, 0);
+   emit_batch_buffer_start(batch, bbo->bo, 0);
 
    anv_batch_bo_finish(current_bbo, batch);
 }
@@ -537,7 +543,7 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, uint32_t size, void *_data)
    }
    *seen_bbo = new_bbo;
 
-   cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo);
+   cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo, ANV_CMD_BUFFER_BATCH_MAIN);
 
    list_addtail(&new_bbo->link, &cmd_buffer->batch_bos);
 
@@ -546,6 +552,46 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, uint32_t size, void *_data)
    return VK_SUCCESS;
 }
 
+static VkResult
+anv_cmd_buffer_chain_generation_batch(struct anv_batch *batch, uint32_t size, void *_data)
+{
+   /* The caller should not need that much space. Otherwise it should split
+    * its commands.
+    */
+   assert(size <= ANV_MAX_CMD_BUFFER_BATCH_SIZE);
+
+   struct anv_cmd_buffer *cmd_buffer = _data;
+   struct anv_batch_bo *new_bbo = NULL;
+   /* Cap reallocation to chunk. */
+   uint32_t alloc_size = MIN2(
+      MAX2(batch->total_batch_size, size),
+      ANV_MAX_CMD_BUFFER_BATCH_SIZE);
+
+   VkResult result = anv_batch_bo_create(cmd_buffer, alloc_size, &new_bbo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   batch->total_batch_size += alloc_size;
+
+   struct anv_batch_bo **seen_bbo = u_vector_add(&cmd_buffer->seen_bbos);
+   if (seen_bbo == NULL) {
+      anv_batch_bo_destroy(new_bbo, cmd_buffer);
+      return vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+   *seen_bbo = new_bbo;
+
+   if (!list_is_empty(&cmd_buffer->generation_batch_bos)) {
+      cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo,
+                                   ANV_CMD_BUFFER_BATCH_GENERATION);
+   }
+
+   list_addtail(&new_bbo->link, &cmd_buffer->generation_batch_bos);
+
+   anv_batch_bo_start(new_bbo, batch, GFX8_MI_BATCH_BUFFER_START_length * 4);
+
+   return VK_SUCCESS;
+}
+
 /** Allocate a binding table
  *
  * This function allocates a binding table.  This is a bit more complicated
@@ -762,6 +808,16 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
    anv_batch_bo_start(batch_bo, &cmd_buffer->batch,
                       GFX8_MI_BATCH_BUFFER_START_length * 4);
 
+   /* Generation batch is initialized empty since it's possible it won't be
+    * used.
+    */
+   list_inithead(&cmd_buffer->generation_batch_bos);
+
+   cmd_buffer->generation_batch.alloc = &cmd_buffer->vk.pool->alloc;
+   cmd_buffer->generation_batch.user_data = cmd_buffer;
+   cmd_buffer->generation_batch.total_batch_size = 0;
+   cmd_buffer->generation_batch.extend_cb = anv_cmd_buffer_chain_generation_batch;
+
    int success = u_vector_init_pow2(&cmd_buffer->seen_bbos, 8,
                                     sizeof(struct anv_bo *));
    if (!success)
@@ -813,6 +869,12 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
       list_del(&bbo->link);
       anv_batch_bo_destroy(bbo, cmd_buffer);
    }
+   /* Also destroy all generation batch buffers */
+   list_for_each_entry_safe(struct anv_batch_bo, bbo,
+                            &cmd_buffer->generation_batch_bos, link) {
+      list_del(&bbo->link);
+      anv_batch_bo_destroy(bbo, cmd_buffer);
+   }
 }
 
 void
@@ -849,14 +911,27 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
 
    *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) = first_bbo;
 
-
    assert(first_bbo->bo->size == ANV_MIN_CMD_BUFFER_BATCH_SIZE);
    cmd_buffer->batch.total_batch_size = first_bbo->bo->size;
+
+   /* Delete all generation batch bos */
+   list_for_each_entry_safe(struct anv_batch_bo, bbo,
+                            &cmd_buffer->generation_batch_bos, link) {
+      list_del(&bbo->link);
+      anv_batch_bo_destroy(bbo, cmd_buffer);
+   }
+
+   /* And reset generation batch */
+   cmd_buffer->generation_batch.total_batch_size = 0;
+   cmd_buffer->generation_batch.start = NULL;
+   cmd_buffer->generation_batch.end   = NULL;
+   cmd_buffer->generation_batch.next  = NULL;
 }
 
 void
 anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
 {
+   const struct intel_device_info *devinfo = cmd_buffer->device->info;
    struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
 
    if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
@@ -878,7 +953,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
        */
       batch_bo->chained = anv_cmd_buffer_is_chainable(cmd_buffer);
       if (batch_bo->chained)
-         emit_batch_buffer_start(cmd_buffer, batch_bo->bo, 0);
+         emit_batch_buffer_start(&cmd_buffer->batch, batch_bo->bo, 0);
       else
          anv_batch_emit(&cmd_buffer->batch, GFX8_MI_BATCH_BUFFER_END, bbe);
 
@@ -903,7 +978,6 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
           * prefetch.
           */
          if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) {
-            const struct intel_device_info *devinfo = cmd_buffer->device->info;
             const enum intel_engine_class engine_class = cmd_buffer->queue_family->engine_class;
             /* Careful to have everything in signed integer. */
             int32_t prefetch_len = devinfo->engine_class_prefetch[engine_class];
@@ -951,7 +1025,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
          assert(cmd_buffer->batch.start == batch_bo->bo->map);
          assert(cmd_buffer->batch.end == batch_bo->bo->map + batch_bo->bo->size);
 
-         emit_batch_buffer_start(cmd_buffer, batch_bo->bo, 0);
+         emit_batch_buffer_start(&cmd_buffer->batch, batch_bo->bo, 0);
          assert(cmd_buffer->batch.start == batch_bo->bo->map);
       } else {
          cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN;
@@ -991,7 +1065,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
       struct anv_batch_bo *last_bbo =
          list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link);
 
-      emit_batch_buffer_start(primary, first_bbo->bo, 0);
+      emit_batch_buffer_start(&primary->batch, first_bbo->bo, 0);
 
       struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary);
       assert(primary->batch.start == this_bbo->bo->map);
@@ -1020,7 +1094,8 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
       struct anv_batch_bo *last_bbo =
          list_last_entry(&copy_list, struct anv_batch_bo, link);
 
-      cmd_buffer_chain_to_batch_bo(primary, first_bbo);
+      cmd_buffer_chain_to_batch_bo(primary, first_bbo,
+                                   ANV_CMD_BUFFER_BATCH_MAIN);
 
       list_splicetail(&copy_list, &primary->batch_bos);
 
@@ -1039,7 +1114,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
                          .Address = secondary->return_addr)
          + (GFX8_MI_STORE_DATA_IMM_ImmediateData_start / 8);
 
-      emit_batch_buffer_start(primary, first_bbo->bo, 0);
+      emit_batch_buffer_start(&primary->batch, first_bbo->bo, 0);
 
       *write_return_addr =
          anv_address_physical(anv_batch_address(&primary->batch,
index 7b3dc75..77df27c 100644 (file)
@@ -103,6 +103,7 @@ anv_create_cmd_buffer(struct vk_command_pool *pool,
       &cmd_buffer->state.gfx.sample_locations;
 
    cmd_buffer->batch.status = VK_SUCCESS;
+   cmd_buffer->generation_batch.status = VK_SUCCESS;
 
    cmd_buffer->device = device;
 
@@ -128,6 +129,8 @@ anv_create_cmd_buffer(struct vk_command_pool *pool,
 
    cmd_buffer->self_mod_locations = NULL;
 
+   cmd_buffer->generation_return_addr = ANV_NULL_ADDRESS;
+
    anv_cmd_state_init(cmd_buffer);
 
    anv_measure_init(cmd_buffer);
index 889393c..99afcee 100644 (file)
@@ -2681,6 +2681,7 @@ struct anv_cmd_buffer {
    struct anv_device *                          device;
    struct anv_queue_family *                    queue_family;
 
+   /** Batch where the main commands live */
    struct anv_batch                             batch;
 
    /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was
@@ -2753,6 +2754,28 @@ struct anv_cmd_buffer {
     */
    uint32_t                                     total_batch_size;
 
+   /** Batch generating part of the anv_cmd_buffer::batch */
+   struct anv_batch                             generation_batch;
+
+   /**
+    * Location in anv_cmd_buffer::batch at which we left some space to insert
+    * a MI_BATCH_BUFFER_START into the generation_batch if needed.
+    */
+   struct anv_address                           generation_jump_addr;
+
+   /**
+    * Location in anv_cmd_buffer::batch at which the generation batch should
+    * jump back to.
+    */
+   struct anv_address                           generation_return_addr;
+
+   /** List of anv_batch_bo used for generation
+    *
+    * We have to keep this separated of the anv_cmd_buffer::batch_bos that is
+    * used for a chaining optimization.
+    */
+   struct list_head                             generation_batch_bos;
+
    /**
     * A vector of anv_bo pointers for chunks of memory used by the command
     * buffer that are too large to be allocated through dynamic_state_stream.