anv: implement INTEL_DEBUG=submit
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Wed, 17 Jun 2020 12:37:33 +0000 (15:37 +0300)
committerLionel Landwerlin <lionel.g.landwerlin@intel.com>
Thu, 4 Mar 2021 17:46:24 +0000 (19:46 +0200)
Name all the BOs!

v2: Fix 32bit build issue (Thanks Marge!)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Marcin Ĺšlusarz <marcin.slusarz@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5736>

14 files changed:
src/intel/vulkan/anv_allocator.c
src/intel/vulkan/anv_batch_chain.c
src/intel/vulkan/anv_descriptor_set.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_measure.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/anv_queue.c
src/intel/vulkan/genX_query.c
src/intel/vulkan/tests/block_pool_grow_first.c
src/intel/vulkan/tests/block_pool_no_free.c
src/intel/vulkan/tests/state_pool.c
src/intel/vulkan/tests/state_pool_free_list_only.c
src/intel/vulkan/tests/state_pool_no_free.c
src/intel/vulkan/tests/state_pool_padding.c

index 01a6b0b..95e3fc3 100644 (file)
@@ -364,11 +364,13 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
 VkResult
 anv_block_pool_init(struct anv_block_pool *pool,
                     struct anv_device *device,
+                    const char *name,
                     uint64_t start_address,
                     uint32_t initial_size)
 {
    VkResult result;
 
+   pool->name = name;
    pool->device = device;
    pool->use_softpin = device->physical->use_softpin;
    pool->nbos = 0;
@@ -495,7 +497,9 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
       uint32_t new_bo_size = size - pool->size;
       struct anv_bo *new_bo;
       assert(center_bo_offset == 0);
-      VkResult result = anv_device_alloc_bo(pool->device, new_bo_size,
+      VkResult result = anv_device_alloc_bo(pool->device,
+                                            pool->name,
+                                            new_bo_size,
                                             bo_alloc_flags |
                                             ANV_BO_ALLOC_FIXED_ADDRESS |
                                             ANV_BO_ALLOC_MAPPED |
@@ -823,6 +827,7 @@ anv_block_pool_alloc_back(struct anv_block_pool *pool,
 VkResult
 anv_state_pool_init(struct anv_state_pool *pool,
                     struct anv_device *device,
+                    const char *name,
                     uint64_t base_address,
                     int32_t start_offset,
                     uint32_t block_size)
@@ -830,7 +835,7 @@ anv_state_pool_init(struct anv_state_pool *pool,
    /* We don't want to ever see signed overflow */
    assert(start_offset < INT32_MAX - (int32_t)BLOCK_POOL_MEMFD_SIZE);
 
-   VkResult result = anv_block_pool_init(&pool->block_pool, device,
+   VkResult result = anv_block_pool_init(&pool->block_pool, device, name,
                                          base_address + start_offset,
                                          block_size * 16);
    if (result != VK_SUCCESS)
@@ -1312,8 +1317,10 @@ anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
 }
 
 void
-anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device)
+anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
+                 const char *name)
 {
+   pool->name = name;
    pool->device = device;
    for (unsigned i = 0; i < ARRAY_SIZE(pool->free_list); i++) {
       util_sparse_array_free_list_init(&pool->free_list[i],
@@ -1361,6 +1368,7 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
    }
 
    VkResult result = anv_device_alloc_bo(pool->device,
+                                         pool->name,
                                          pow2_size,
                                          ANV_BO_ALLOC_MAPPED |
                                          ANV_BO_ALLOC_SNOOPED |
@@ -1525,7 +1533,7 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
     *
     * so nothing will ever touch the top page.
     */
-   VkResult result = anv_device_alloc_bo(device, size,
+   VkResult result = anv_device_alloc_bo(device, "scratch", size,
                                          ANV_BO_ALLOC_32BIT_ADDRESS,
                                          0 /* explicit_address */,
                                          &bo);
@@ -1611,6 +1619,7 @@ anv_device_get_bo_align(struct anv_device *device,
 
 VkResult
 anv_device_alloc_bo(struct anv_device *device,
+                    const char *name,
                     uint64_t size,
                     enum anv_bo_alloc_flags alloc_flags,
                     uint64_t explicit_address,
@@ -1644,6 +1653,7 @@ anv_device_alloc_bo(struct anv_device *device,
       return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
 
    struct anv_bo new_bo = {
+      .name = name,
       .gem_handle = gem_handle,
       .refcount = 1,
       .offset = -1,
@@ -1779,6 +1789,7 @@ anv_device_import_bo_from_host_ptr(struct anv_device *device,
       __sync_fetch_and_add(&bo->refcount, 1);
    } else {
       struct anv_bo new_bo = {
+         .name = "host-ptr",
          .gem_handle = gem_handle,
          .refcount = 1,
          .offset = -1,
@@ -1913,6 +1924,7 @@ anv_device_import_bo(struct anv_device *device,
       }
 
       struct anv_bo new_bo = {
+         .name = "imported",
          .gem_handle = gem_handle,
          .refcount = 1,
          .offset = -1,
index 7b7d6ed..724ed68 100644 (file)
@@ -1911,6 +1911,16 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
       submit->cmd_buffer_count &&
       submit->perf_query_pool;
 
+   if (INTEL_DEBUG & DEBUG_SUBMIT) {
+      fprintf(stderr, "Batch on queue 0\n");
+      for (uint32_t i = 0; i < execbuf.bo_count; i++) {
+         const struct anv_bo *bo = execbuf.bos[i];
+
+         fprintf(stderr, "   BO: addr=0x%016"PRIx64" size=%010"PRIx64" handle=%05u name=%s\n",
+                 bo->offset, bo->size, bo->gem_handle, bo->name);
+      }
+   }
+
    if (INTEL_DEBUG & DEBUG_BATCH) {
       fprintf(stderr, "Batch on queue %d\n", (int)(queue - device->queues));
       if (submit->cmd_buffer_count) {
index a8d91b3..3d2dca1 100644 (file)
@@ -867,6 +867,7 @@ VkResult anv_CreateDescriptorPool(
 
    if (descriptor_bo_size > 0) {
       VkResult result = anv_device_alloc_bo(device,
+                                            "descriptors",
                                             descriptor_bo_size,
                                             ANV_BO_ALLOC_MAPPED |
                                             ANV_BO_ALLOC_SNOOPED,
index bbf5147..66209da 100644 (file)
@@ -2672,7 +2672,7 @@ anv_device_init_border_colors(struct anv_device *device)
 static VkResult
 anv_device_init_trivial_batch(struct anv_device *device)
 {
-   VkResult result = anv_device_alloc_bo(device, 4096,
+   VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
                                          ANV_BO_ALLOC_MAPPED,
                                          0 /* explicit_address */,
                                          &device->trivial_batch_bo);
@@ -2714,7 +2714,7 @@ vk_priority_to_gen(int priority)
 static VkResult
 anv_device_init_hiz_clear_value_bo(struct anv_device *device)
 {
-   VkResult result = anv_device_alloc_bo(device, 4096,
+   VkResult result = anv_device_alloc_bo(device, "hiz-clear-value", 4096,
                                          ANV_BO_ALLOC_MAPPED,
                                          0 /* explicit_address */,
                                          &device->hiz_clear_bo);
@@ -3096,18 +3096,20 @@ VkResult anv_CreateDevice(
    if (result != VK_SUCCESS)
       goto fail_queue_cond;
 
-   anv_bo_pool_init(&device->batch_bo_pool, device);
+   anv_bo_pool_init(&device->batch_bo_pool, device, "batch");
 
    /* Because scratch is also relative to General State Base Address, we leave
     * the base address 0 and start the pool memory at an offset.  This way we
     * get the correct offsets in the anv_states that get allocated from it.
     */
    result = anv_state_pool_init(&device->general_state_pool, device,
+                                "general pool",
                                 0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
    if (result != VK_SUCCESS)
       goto fail_batch_bo_pool;
 
    result = anv_state_pool_init(&device->dynamic_state_pool, device,
+                                "dynamic pool",
                                 DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
    if (result != VK_SUCCESS)
       goto fail_general_state_pool;
@@ -3126,11 +3128,13 @@ VkResult anv_CreateDevice(
    }
 
    result = anv_state_pool_init(&device->instruction_state_pool, device,
+                                "instruction pool",
                                 INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
    if (result != VK_SUCCESS)
       goto fail_dynamic_state_pool;
 
    result = anv_state_pool_init(&device->surface_state_pool, device,
+                                "surface state pool",
                                 SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
    if (result != VK_SUCCESS)
       goto fail_instruction_state_pool;
@@ -3140,6 +3144,7 @@ VkResult anv_CreateDevice(
                                (int64_t)SURFACE_STATE_POOL_MIN_ADDRESS;
       assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
       result = anv_state_pool_init(&device->binding_table_pool, device,
+                                   "binding table pool",
                                    SURFACE_STATE_POOL_MIN_ADDRESS,
                                    bt_pool_offset, 4096);
       if (result != VK_SUCCESS)
@@ -3153,7 +3158,7 @@ VkResult anv_CreateDevice(
          goto fail_binding_table_pool;
    }
 
-   result = anv_device_alloc_bo(device, 4096,
+   result = anv_device_alloc_bo(device, "workaround", 4096,
                                 ANV_BO_ALLOC_CAPTURE | ANV_BO_ALLOC_MAPPED /* flags */,
                                 0 /* explicit_address */,
                                 &device->workaround_bo);
@@ -3822,7 +3827,7 @@ VkResult anv_AllocateMemory(
 
    /* Regular allocate (not importing memory). */
 
-   result = anv_device_alloc_bo(device, pAllocateInfo->allocationSize,
+   result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
                                 alloc_flags, client_address, &mem->bo);
    if (result != VK_SUCCESS)
       goto fail;
index 6fda408..28e3b96 100644 (file)
@@ -114,7 +114,7 @@ anv_measure_init(struct anv_cmd_buffer *cmd_buffer)
 
    memset(measure, 0, batch_bytes);
    VkResult result =
-      anv_device_alloc_bo(device,
+      anv_device_alloc_bo(device, "measure data",
                           config->batch_size * sizeof(uint64_t),
                           ANV_BO_ALLOC_MAPPED,
                           0,
@@ -327,7 +327,7 @@ anv_measure_reset(struct anv_cmd_buffer *cmd_buffer)
 
    anv_device_release_bo(device, measure->bo);
    VkResult result =
-      anv_device_alloc_bo(device,
+      anv_device_alloc_bo(device, "measure data",
                           config->batch_size * sizeof(uint64_t),
                           ANV_BO_ALLOC_MAPPED,
                           0,
index fe06c82..d25d8a8 100644 (file)
@@ -553,6 +553,8 @@ anv_multialloc_alloc2(struct anv_multialloc *ma,
 }
 
 struct anv_bo {
+   const char *name;
+
    uint32_t gem_handle;
 
    uint32_t refcount;
@@ -688,6 +690,8 @@ struct anv_block_state {
 #define ANV_MAX_BLOCK_POOL_BOS 20
 
 struct anv_block_pool {
+   const char *name;
+
    struct anv_device *device;
    bool use_softpin;
 
@@ -835,6 +839,7 @@ struct anv_state_stream {
  */
 VkResult anv_block_pool_init(struct anv_block_pool *pool,
                              struct anv_device *device,
+                             const char *name,
                              uint64_t start_address,
                              uint32_t initial_size);
 void anv_block_pool_finish(struct anv_block_pool *pool);
@@ -847,6 +852,7 @@ size);
 
 VkResult anv_state_pool_init(struct anv_state_pool *pool,
                              struct anv_device *device,
+                             const char *name,
                              uint64_t base_address,
                              int32_t start_offset,
                              uint32_t block_size);
@@ -894,12 +900,15 @@ anv_state_table_get(struct anv_state_table *table, uint32_t idx)
  * of block_pool except that each block is its own BO.
  */
 struct anv_bo_pool {
+   const char *name;
+
    struct anv_device *device;
 
    struct util_sparse_array_free_list free_list[16];
 };
 
-void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device);
+void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
+                      const char *name);
 void anv_bo_pool_finish(struct anv_bo_pool *pool);
 VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
                            struct anv_bo **bo_out);
@@ -1450,7 +1459,8 @@ enum anv_bo_alloc_flags {
    ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
 };
 
-VkResult anv_device_alloc_bo(struct anv_device *device, uint64_t size,
+VkResult anv_device_alloc_bo(struct anv_device *device,
+                             const char *name, uint64_t size,
                              enum anv_bo_alloc_flags alloc_flags,
                              uint64_t explicit_address,
                              struct anv_bo **bo);
index e37f46a..c094841 100644 (file)
@@ -173,7 +173,7 @@ anv_timeline_add_point_locked(struct anv_device *device,
       if (!(*point))
          result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       if (result == VK_SUCCESS) {
-         result = anv_device_alloc_bo(device, 4096,
+         result = anv_device_alloc_bo(device, "timeline-semaphore", 4096,
                                       ANV_BO_ALLOC_EXTERNAL |
                                       ANV_BO_ALLOC_IMPLICIT_SYNC,
                                       0 /* explicit_address */,
@@ -809,7 +809,7 @@ anv_queue_submit_simple_batch(struct anv_queue *queue,
       result = anv_queue_submit_add_syncobj(submit, device, syncobj,
                                             I915_EXEC_FENCE_SIGNAL, 0);
    } else {
-      result = anv_device_alloc_bo(device, 4096,
+      result = anv_device_alloc_bo(device, "simple-batch-sync", 4096,
                                    ANV_BO_ALLOC_EXTERNAL |
                                    ANV_BO_ALLOC_IMPLICIT_SYNC,
                                    0 /* explicit_address */,
@@ -2175,7 +2175,7 @@ binary_semaphore_create(struct anv_device *device,
    } else {
       impl->type = ANV_SEMAPHORE_TYPE_BO;
       VkResult result =
-         anv_device_alloc_bo(device, 4096,
+         anv_device_alloc_bo(device, "binary-semaphore", 4096,
                              ANV_BO_ALLOC_EXTERNAL |
                              ANV_BO_ALLOC_IMPLICIT_SYNC,
                              0 /* explicit_address */,
index a39dc06..09cac22 100644 (file)
@@ -206,7 +206,7 @@ VkResult genX(CreateQueryPool)(
       bo_flags |= EXEC_OBJECT_ASYNC;
 
    uint64_t size = pool->slots * pool->stride;
-   result = anv_device_alloc_bo(device, size,
+   result = anv_device_alloc_bo(device, "query-pool", size,
                                 ANV_BO_ALLOC_MAPPED |
                                 ANV_BO_ALLOC_SNOOPED,
                                 0 /* explicit_address */,
index 7c66157..e50f65c 100644 (file)
@@ -42,7 +42,7 @@ int main(void)
 
    pthread_mutex_init(&device.mutex, NULL);
    anv_bo_cache_init(&device.bo_cache);
-   anv_block_pool_init(&pool, &device, 4096, initial_size);
+   anv_block_pool_init(&pool, &device, "test", 4096, initial_size);
    ASSERT(pool.size == initial_size);
 
    uint32_t padding;
index 9b5b77f..37030bd 100644 (file)
@@ -118,7 +118,7 @@ static void run_test()
 
    pthread_mutex_init(&device.mutex, NULL);
    anv_bo_cache_init(&device.bo_cache);
-   anv_block_pool_init(&pool, &device, 4096, 4096);
+   anv_block_pool_init(&pool, &device, "test", 4096, 4096);
 
    for (unsigned i = 0; i < NUM_THREADS; i++) {
       jobs[i].pool = &pool;
index 0c9f5d3..2f54efe 100644 (file)
@@ -45,7 +45,7 @@ int main(void)
    anv_bo_cache_init(&device.bo_cache);
 
    for (unsigned i = 0; i < NUM_RUNS; i++) {
-      anv_state_pool_init(&state_pool, &device, 4096, 0, 256);
+      anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 256);
 
       /* Grab one so a zero offset is impossible */
       anv_state_pool_alloc(&state_pool, 16, 16);
index fd471a7..1931698 100644 (file)
@@ -42,7 +42,7 @@ int main(void)
 
    pthread_mutex_init(&device.mutex, NULL);
    anv_bo_cache_init(&device.bo_cache);
-   anv_state_pool_init(&state_pool, &device, 4096, 0, 4096);
+   anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 4096);
 
    /* Grab one so a zero offset is impossible */
    anv_state_pool_alloc(&state_pool, 16, 16);
index 8713640..4288e1a 100644 (file)
@@ -63,7 +63,7 @@ static void run_test()
 
    pthread_mutex_init(&device.mutex, NULL);
    anv_bo_cache_init(&device.bo_cache);
-   anv_state_pool_init(&state_pool, &device, 4096, 0, 64);
+   anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 64);
 
    pthread_barrier_init(&barrier, NULL, NUM_THREADS);
 
index 182b370..70fb773 100644 (file)
@@ -36,7 +36,7 @@ int main(void)
 
    pthread_mutex_init(&device.mutex, NULL);
    anv_bo_cache_init(&device.bo_cache);
-   anv_state_pool_init(&state_pool, &device, 4096, 0, 4096);
+   anv_state_pool_init(&state_pool, &device, "test", 4096, 0, 4096);
 
    /* Get the size of the underlying block_pool */
    struct anv_block_pool *bp = &state_pool.block_pool;