turnip: Add a refcount mechanism to BOs

author Danylo Piliaiev <dpiliaiev@igalia.com>

Wed, 2 Feb 2022 17:29:34 +0000 (19:29 +0200)

committer Marge Bot <emma+marge@anholt.net>

Sat, 19 Feb 2022 15:16:55 +0000 (15:16 +0000)
author Danylo Piliaiev <dpiliaiev@igalia.com>
Wed, 2 Feb 2022 17:29:34 +0000 (19:29 +0200)
committer Marge Bot <emma+marge@anholt.net>
Sat, 19 Feb 2022 15:16:55 +0000 (15:16 +0000)
diff --git a/src/freedreno/vulkan/tu_autotune.c b/src/freedreno/vulkan/tu_autotune.c

index 7ed78e1..cee3321 100644 (file)
--- a/src/freedreno/vulkan/tu_autotune.c
+++ b/src/freedreno/vulkan/tu_autotune.c
@@ -362,8 +362,7 @@ tu_autotune_init(struct tu_autotune *at, struct tu_device *dev)
                                      renderpass_key_equals);
     u_rwlock_init(&at->ht_lock);
  
-   at->results_bo = malloc(sizeof(struct tu_bo));
-   result = tu_bo_init_new(dev, at->results_bo,
+   result = tu_bo_init_new(dev, &at->results_bo,
                             sizeof(struct tu_autotune_results),
                             TU_BO_ALLOC_NO_FLAGS);
     if (result != VK_SUCCESS) {
@@ -389,7 +388,6 @@ fail_map_bo:
     tu_bo_finish(dev, at->results_bo);
  
  fail_bo:
-   free(at->results_bo);
     u_rwlock_destroy(&at->ht_lock);
     _mesa_hash_table_destroy(at->ht, NULL);
  
@@ -428,7 +426,6 @@ tu_autotune_fini(struct tu_autotune *at, struct tu_device *dev)
     _mesa_hash_table_destroy(at->ht, NULL);
     u_rwlock_destroy(&at->ht_lock);
     tu_bo_finish(dev, at->results_bo);
-   free(at->results_bo);
  }
  
  bool
diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c

index ebaefcd..ddb9f6c 100644 (file)
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -559,13 +559,13 @@ compile_shader(struct tu_device *dev, struct nir_shader *nir,
     struct ir3_shader_variant *so =
        ir3_shader_get_variant(sh, &key, false, false, &created);
  
-   struct tu6_global *global = dev->global_bo.map;
+   struct tu6_global *global = dev->global_bo->map;
  
     assert(*offset + so->info.sizedwords <= ARRAY_SIZE(global->shaders));
     dev->global_shaders[idx] = so;
     memcpy(&global->shaders[*offset], so->bin,
            sizeof(uint32_t) * so->info.sizedwords);
-   dev->global_shader_va[idx] = dev->global_bo.iova +
+   dev->global_shader_va[idx] = dev->global_bo->iova +
        gb_offset(shaders[*offset]);
     *offset += align(so->info.sizedwords, 32);
  }
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c

index 09fa0ea..1daa9de 100644 (file)
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -77,7 +77,7 @@ tu6_lazy_emit_tessfactor_addr(struct tu_cmd_buffer *cmd)
  
     assert(cmd->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
  
-   tu_cs_emit_regs(&cmd->cs, A6XX_PC_TESSFACTOR_ADDR(.qword = cmd->device->tess_bo.iova));
+   tu_cs_emit_regs(&cmd->cs, A6XX_PC_TESSFACTOR_ADDR(.qword = cmd->device->tess_bo->iova));
     cmd->state.tessfactor_addr_set = true;
  }
  
@@ -896,10 +896,10 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
     tu_disable_draw_states(cmd, cs);
  
     tu_cs_emit_regs(cs,
-                   A6XX_SP_TP_BORDER_COLOR_BASE_ADDR(.bo = &dev->global_bo,
+                   A6XX_SP_TP_BORDER_COLOR_BASE_ADDR(.bo = dev->global_bo,
                                                       .bo_offset = gb_offset(bcolor_builtin)));
     tu_cs_emit_regs(cs,
-                   A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR(.bo = &dev->global_bo,
+                   A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR(.bo = dev->global_bo,
                                                          .bo_offset = gb_offset(bcolor_builtin)));
  
     /* VSC buffers:
@@ -911,7 +911,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
      */
     mtx_lock(&dev->mutex);
  
-   struct tu6_global *global = dev->global_bo.map;
+   struct tu6_global *global = dev->global_bo->map;
  
     uint32_t vsc_draw_overflow = global->vsc_draw_overflow;
     uint32_t vsc_prim_overflow = global->vsc_prim_overflow;
@@ -4831,7 +4831,7 @@ tu_barrier(struct tu_cmd_buffer *cmd,
        tu_cs_emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
        tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
                       CP_WAIT_REG_MEM_0_POLL_MEMORY);
-      tu_cs_emit_qw(cs, event->bo.iova); /* POLL_ADDR_LO/HI */
+      tu_cs_emit_qw(cs, event->bo->iova); /* POLL_ADDR_LO/HI */
        tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(1));
        tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0u));
        tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(20));
@@ -4883,13 +4883,13 @@ write_event(struct tu_cmd_buffer *cmd, struct tu_event *event,
  
     if (!(stageMask & ~top_of_pipe_flags)) {
        tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 3);
-      tu_cs_emit_qw(cs, event->bo.iova); /* ADDR_LO/HI */
+      tu_cs_emit_qw(cs, event->bo->iova); /* ADDR_LO/HI */
        tu_cs_emit(cs, value);
     } else {
        /* Use a RB_DONE_TS event to wait for everything to complete. */
        tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 4);
        tu_cs_emit(cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS));
-      tu_cs_emit_qw(cs, event->bo.iova);
+      tu_cs_emit_qw(cs, event->bo->iova);
        tu_cs_emit(cs, value);
     }
  }
diff --git a/src/freedreno/vulkan/tu_cs.c b/src/freedreno/vulkan/tu_cs.c

index 8372af0..c95ffdc 100644 (file)
--- a/src/freedreno/vulkan/tu_cs.c
+++ b/src/freedreno/vulkan/tu_cs.c
@@ -64,7 +64,6 @@ tu_cs_finish(struct tu_cs *cs)
  {
     for (uint32_t i = 0; i < cs->bo_count; ++i) {
        tu_bo_finish(cs->device, cs->bos[i]);
-      free(cs->bos[i]);
     }
  
     free(cs->entries);
@@ -107,12 +106,10 @@ tu_cs_add_bo(struct tu_cs *cs, uint32_t size)
        cs->bos = new_bos;
     }
  
-   struct tu_bo *new_bo = malloc(sizeof(struct tu_bo));
-   if (!new_bo)
-      return VK_ERROR_OUT_OF_HOST_MEMORY;
+   struct tu_bo *new_bo;
  
     VkResult result =
-      tu_bo_init_new(cs->device, new_bo, size * sizeof(uint32_t),
+      tu_bo_init_new(cs->device, &new_bo, size * sizeof(uint32_t),
                       TU_BO_ALLOC_GPU_READ_ONLY | TU_BO_ALLOC_ALLOW_DUMP);
     if (result != VK_SUCCESS) {
        free(new_bo);
@@ -122,7 +119,6 @@ tu_cs_add_bo(struct tu_cs *cs, uint32_t size)
     result = tu_bo_map(cs->device, new_bo);
     if (result != VK_SUCCESS) {
        tu_bo_finish(cs->device, new_bo);
-      free(new_bo);
        return result;
     }
  
@@ -408,7 +404,6 @@ tu_cs_reset(struct tu_cs *cs)
  
     for (uint32_t i = 0; i + 1 < cs->bo_count; ++i) {
        tu_bo_finish(cs->device, cs->bos[i]);
-      free(cs->bos[i]);
     }
  
     if (cs->bo_count) {
diff --git a/src/freedreno/vulkan/tu_descriptor_set.c b/src/freedreno/vulkan/tu_descriptor_set.c

index 834ade7..97142a6 100644 (file)
--- a/src/freedreno/vulkan/tu_descriptor_set.c
+++ b/src/freedreno/vulkan/tu_descriptor_set.c
@@ -50,7 +50,7 @@
  static inline uint8_t *
  pool_base(struct tu_descriptor_pool *pool)
  {
-   return pool->host_bo ?: pool->bo.map;
+   return pool->host_bo ?: pool->bo->map;
  }
  
  static uint32_t
@@ -504,7 +504,7 @@ tu_descriptor_set_create(struct tu_device *device,
         * resets via the pool. */
        if (pool->current_offset + layout_size <= pool->size) {
           set->mapped_ptr = (uint32_t*)(pool_base(pool) + pool->current_offset);
-         set->va = pool->host_bo ? 0 : pool->bo.iova + pool->current_offset;
+         set->va = pool->host_bo ? 0 : pool->bo->iova + pool->current_offset;
  
           if (!pool->host_memory_base) {
              pool->entries[pool->entry_count].offset = pool->current_offset;
@@ -529,7 +529,7 @@ tu_descriptor_set_create(struct tu_device *device,
           }
  
           set->mapped_ptr = (uint32_t*)(pool_base(pool) + offset);
-         set->va = pool->host_bo ? 0 : pool->bo.iova + offset;
+         set->va = pool->host_bo ? 0 : pool->bo->iova + offset;
  
           memmove(&pool->entries[index + 1], &pool->entries[index],
              sizeof(pool->entries[0]) * (pool->entry_count - index));
@@ -666,7 +666,7 @@ tu_CreateDescriptorPool(VkDevice _device,
           if (ret)
              goto fail_alloc;
  
-         ret = tu_bo_map(device, &pool->bo);
+         ret = tu_bo_map(device, pool->bo);
           if (ret)
              goto fail_map;
        } else {
@@ -687,7 +687,7 @@ tu_CreateDescriptorPool(VkDevice _device,
     return VK_SUCCESS;
  
  fail_map:
-   tu_bo_finish(device, &pool->bo);
+   tu_bo_finish(device, pool->bo);
  fail_alloc:
     vk_object_free(&device->vk, pAllocator, pool);
     return ret;
@@ -719,7 +719,7 @@ tu_DestroyDescriptorPool(VkDevice _device,
        if (pool->host_bo)
           vk_free2(&device->vk.alloc, pAllocator, pool->host_bo);
        else
-         tu_bo_finish(device, &pool->bo);
+         tu_bo_finish(device, pool->bo);
     }
  
     vk_object_free(&device->vk, pAllocator, pool);
diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c

index 0e68633..4c6d7f1 100644 (file)
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@@ -1402,8 +1402,8 @@ tu_trace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size)
     struct tu_device *device =
        container_of(utctx, struct tu_device, trace_context);
  
-   struct tu_bo *bo = ralloc(NULL, struct tu_bo);
-   tu_bo_init_new(device, bo, size, false);
+   struct tu_bo *bo;
+   tu_bo_init_new(device, &bo, size, false);
  
     return bo;
  }
@@ -1416,7 +1416,6 @@ tu_trace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
     struct tu_bo *bo = timestamps;
  
     tu_bo_finish(device, bo);
-   ralloc_free(bo);
  }
  
  static void
@@ -1674,6 +1673,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
     device->fd = physical_device->local_fd;
  
     mtx_init(&device->bo_mutex, mtx_plain);
+   u_rwlock_init(&device->dma_bo_lock);
     pthread_mutex_init(&device->submit_mutex, NULL);
  
  #ifndef TU_USE_KGSL
@@ -1716,6 +1716,9 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
        goto fail_queues;
     }
  
+   /* Initialize sparse array for refcounting imported BOs */
+   util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
+
     /* initial sizes, these will increase if there is overflow */
     device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
     device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
@@ -1731,13 +1734,13 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
        goto fail_global_bo;
     }
  
-   result = tu_bo_map(device, &device->global_bo);
+   result = tu_bo_map(device, device->global_bo);
     if (result != VK_SUCCESS) {
        vk_startup_errorf(device->instance, result, "BO map");
        goto fail_global_bo_map;
     }
  
-   struct tu6_global *global = device->global_bo.map;
+   struct tu6_global *global = device->global_bo->map;
     tu_init_clear_blit_shaders(device);
     global->predicate = 0;
     tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK],
@@ -1868,11 +1871,11 @@ fail_perfcntrs_pass_alloc:
  fail_pipeline_cache:
     tu_destroy_clear_blit_shaders(device);
  fail_global_bo_map:
-   tu_bo_finish(device, &device->global_bo);
-   vk_free(&device->vk.alloc, device->bo_idx);
+   tu_bo_finish(device, device->global_bo);
     vk_free(&device->vk.alloc, device->bo_list);
  fail_global_bo:
     ir3_compiler_destroy(device->compiler);
+   util_sparse_array_finish(&device->bo_map);
  
  fail_queues:
     for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
@@ -1882,6 +1885,7 @@ fail_queues:
           vk_free(&device->vk.alloc, device->queues[i]);
     }
  
+   u_rwlock_destroy(&device->dma_bo_lock);
     vk_device_finish(&device->vk);
     vk_free(&device->vk.alloc, device);
     return result;
@@ -1906,7 +1910,7 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
  
     for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
        if (device->scratch_bos[i].initialized)
-         tu_bo_finish(device, &device->scratch_bos[i].bo);
+         tu_bo_finish(device, device->scratch_bos[i].bo);
     }
  
     tu_destroy_clear_blit_shaders(device);
@@ -1924,9 +1928,11 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
  
     tu_autotune_fini(&device->autotune, device);
  
+   util_sparse_array_finish(&device->bo_map);
+   u_rwlock_destroy(&device->dma_bo_lock);
+
     pthread_cond_destroy(&device->timeline_cond);
     vk_free(&device->vk.alloc, device->bo_list);
-   vk_free(&device->vk.alloc, device->bo_idx);
     vk_device_finish(&device->vk);
     vk_free(&device->vk.alloc, device);
  }
@@ -1941,7 +1947,7 @@ tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
     for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
        if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
           /* Fast path: just return the already-allocated BO. */
-         *bo = &dev->scratch_bos[i].bo;
+         *bo = dev->scratch_bos[i].bo;
           return VK_SUCCESS;
        }
     }
@@ -1957,7 +1963,7 @@ tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
      */
     if (dev->scratch_bos[index].initialized) {
        mtx_unlock(&dev->scratch_bos[index].construct_mtx);
-      *bo = &dev->scratch_bos[index].bo;
+      *bo = dev->scratch_bos[index].bo;
        return VK_SUCCESS;
     }
  
@@ -1973,7 +1979,7 @@ tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
  
     mtx_unlock(&dev->scratch_bos[index].construct_mtx);
  
-   *bo = &dev->scratch_bos[index].bo;
+   *bo = dev->scratch_bos[index].bo;
     return VK_SUCCESS;
  }
  
@@ -2123,10 +2129,10 @@ tu_AllocateMemory(VkDevice _device,
  
  
     if (result == VK_SUCCESS) {
-      mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo.size);
+      mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
        if (mem_heap_used > mem_heap->size) {
-         p_atomic_add(&mem_heap->used, -mem->bo.size);
-         tu_bo_finish(device, &mem->bo);
+         p_atomic_add(&mem_heap->used, -mem->bo->size);
+         tu_bo_finish(device, mem->bo);
           result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
                              "Out of heap memory");
        }
@@ -2153,8 +2159,8 @@ tu_FreeMemory(VkDevice _device,
     if (mem == NULL)
        return;
  
-   p_atomic_add(&device->physical_device->heap.used, -mem->bo.size);
-   tu_bo_finish(device, &mem->bo);
+   p_atomic_add(&device->physical_device->heap.used, -mem->bo->size);
+   tu_bo_finish(device, mem->bo);
     vk_object_free(&device->vk, pAllocator, mem);
  }
  
@@ -2175,13 +2181,13 @@ tu_MapMemory(VkDevice _device,
        return VK_SUCCESS;
     }
  
-   if (!mem->bo.map) {
-      result = tu_bo_map(device, &mem->bo);
+   if (!mem->bo->map) {
+      result = tu_bo_map(device, mem->bo);
        if (result != VK_SUCCESS)
           return result;
     }
  
-   *ppData = mem->bo.map + offset;
+   *ppData = mem->bo->map + offset;
     return VK_SUCCESS;
  }
  
@@ -2292,8 +2298,8 @@ tu_BindBufferMemory2(VkDevice device,
        TU_FROM_HANDLE(tu_buffer, buffer, pBindInfos[i].buffer);
  
        if (mem) {
-         buffer->bo = &mem->bo;
-         buffer->iova = mem->bo.iova + pBindInfos[i].memoryOffset;
+         buffer->bo = mem->bo;
+         buffer->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
        } else {
           buffer->bo = NULL;
        }
@@ -2311,8 +2317,8 @@ tu_BindImageMemory2(VkDevice device,
        TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
  
        if (mem) {
-         image->bo = &mem->bo;
-         image->iova = mem->bo.iova + pBindInfos[i].memoryOffset;
+         image->bo = mem->bo;
+         image->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
        } else {
           image->bo = NULL;
           image->iova = 0;
@@ -2350,7 +2356,7 @@ tu_CreateEvent(VkDevice _device,
     if (result != VK_SUCCESS)
        goto fail_alloc;
  
-   result = tu_bo_map(device, &event->bo);
+   result = tu_bo_map(device, event->bo);
     if (result != VK_SUCCESS)
        goto fail_map;
  
@@ -2359,7 +2365,7 @@ tu_CreateEvent(VkDevice _device,
     return VK_SUCCESS;
  
  fail_map:
-   tu_bo_finish(device, &event->bo);
+   tu_bo_finish(device, event->bo);
  fail_alloc:
     vk_object_free(&device->vk, pAllocator, event);
     return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
@@ -2376,7 +2382,7 @@ tu_DestroyEvent(VkDevice _device,
     if (!event)
        return;
  
-   tu_bo_finish(device, &event->bo);
+   tu_bo_finish(device, event->bo);
     vk_object_free(&device->vk, pAllocator, event);
  }
  
@@ -2385,7 +2391,7 @@ tu_GetEventStatus(VkDevice _device, VkEvent _event)
  {
     TU_FROM_HANDLE(tu_event, event, _event);
  
-   if (*(uint64_t*) event->bo.map == 1)
+   if (*(uint64_t*) event->bo->map == 1)
        return VK_EVENT_SET;
     return VK_EVENT_RESET;
  }
@@ -2394,7 +2400,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
  tu_SetEvent(VkDevice _device, VkEvent _event)
  {
     TU_FROM_HANDLE(tu_event, event, _event);
-   *(uint64_t*) event->bo.map = 1;
+   *(uint64_t*) event->bo->map = 1;
  
     return VK_SUCCESS;
  }
@@ -2403,7 +2409,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
  tu_ResetEvent(VkDevice _device, VkEvent _event)
  {
     TU_FROM_HANDLE(tu_event, event, _event);
-   *(uint64_t*) event->bo.map = 0;
+   *(uint64_t*) event->bo->map = 0;
  
     return VK_SUCCESS;
  }
@@ -2524,7 +2530,7 @@ tu_init_sampler(struct tu_device *device,
        border_color = BITSET_FFS(device->custom_border_color);
        BITSET_CLEAR(device->custom_border_color, border_color);
        mtx_unlock(&device->mutex);
-      tu6_pack_border_color(device->global_bo.map + gb_offset(bcolor[border_color]),
+      tu6_pack_border_color(device->global_bo->map + gb_offset(bcolor[border_color]),
                              &custom_border_color->customBorderColor,
                              pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT);
        border_color += TU_BORDER_COLOR_BUILTIN;
@@ -2690,7 +2696,7 @@ tu_GetMemoryFdKHR(VkDevice _device,
            pGetFdInfo->handleType ==
               VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
  
-   int prime_fd = tu_bo_export_dmabuf(device, &memory->bo);
+   int prime_fd = tu_bo_export_dmabuf(device, memory->bo);
     if (prime_fd < 0)
        return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
  
diff --git a/src/freedreno/vulkan/tu_drm.c b/src/freedreno/vulkan/tu_drm.c

index a5432af..ec7f0e2 100644 (file)
--- a/src/freedreno/vulkan/tu_drm.c
+++ b/src/freedreno/vulkan/tu_drm.c
@@ -196,12 +196,6 @@ tu_bo_init(struct tu_device *dev,
        return VK_ERROR_OUT_OF_DEVICE_MEMORY;
     }
  
-   *bo = (struct tu_bo) {
-      .gem_handle = gem_handle,
-      .size = size,
-      .iova = iova,
-   };
-
     mtx_lock(&dev->bo_mutex);
     uint32_t idx = dev->bo_count++;
  
@@ -218,39 +212,32 @@ tu_bo_init(struct tu_device *dev,
        dev->bo_list_size = new_len;
     }
  
-   /* grow the "bo idx" list (maps gem handles to index in the bo list) */
-   if (bo->gem_handle >= dev->bo_idx_size) {
-      uint32_t new_len = bo->gem_handle + 256;
-      uint32_t *new_ptr =
-         vk_realloc(&dev->vk.alloc, dev->bo_idx, new_len * sizeof(*dev->bo_idx),
-                    8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
-      if (!new_ptr)
-         goto fail_bo_idx;
-
-      dev->bo_idx = new_ptr;
-      dev->bo_idx_size = new_len;
-   }
-
-   dev->bo_idx[bo->gem_handle] = idx;
     dev->bo_list[idx] = (struct drm_msm_gem_submit_bo) {
        .flags = MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE |
                 COND(dump, MSM_SUBMIT_BO_DUMP),
        .handle = gem_handle,
        .presumed = iova,
     };
+
+   *bo = (struct tu_bo) {
+      .gem_handle = gem_handle,
+      .size = size,
+      .iova = iova,
+      .refcnt = 1,
+      .bo_list_idx = idx,
+   };
+
     mtx_unlock(&dev->bo_mutex);
  
     return VK_SUCCESS;
  
-fail_bo_idx:
-   vk_free(&dev->vk.alloc, dev->bo_list);
  fail_bo_list:
     tu_gem_close(dev, gem_handle);
     return VK_ERROR_OUT_OF_HOST_MEMORY;
  }
  
  VkResult
-tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
+tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
                 enum tu_bo_alloc_flags flags)
  {
     /* TODO: Choose better flags. As of 2018-11-12, freedreno/drm/msm_bo.c
@@ -269,12 +256,23 @@ tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
     if (ret)
        return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
  
-   return tu_bo_init(dev, bo, req.handle, size, flags & TU_BO_ALLOC_ALLOW_DUMP);
+   struct tu_bo* bo = tu_device_lookup_bo(dev, req.handle);
+   assert(bo && bo->gem_handle == 0);
+
+   VkResult result =
+      tu_bo_init(dev, bo, req.handle, size, flags & TU_BO_ALLOC_ALLOW_DUMP);
+
+   if (result != VK_SUCCESS)
+      memset(bo, 0, sizeof(*bo));
+   else
+      *out_bo = bo;
+
+   return result;
  }
  
  VkResult
  tu_bo_init_dmabuf(struct tu_device *dev,
-                  struct tu_bo *bo,
+                  struct tu_bo **out_bo,
                    uint64_t size,
                    int prime_fd)
  {
@@ -284,13 +282,42 @@ tu_bo_init_dmabuf(struct tu_device *dev,
     if (real_size < 0 || (uint64_t) real_size < size)
        return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
  
+   /* Importing the same dmabuf several times would yield the same
+    * gem_handle. Thus there could be a race when destroying
+    * BO and importing the same dmabuf from different threads.
+    * We must not permit the creation of dmabuf BO and its release
+    * to happen in parallel.
+    */
+   u_rwlock_wrlock(&dev->dma_bo_lock);
+
     uint32_t gem_handle;
     int ret = drmPrimeFDToHandle(dev->fd, prime_fd,
                                  &gem_handle);
-   if (ret)
+   if (ret) {
+      u_rwlock_wrunlock(&dev->dma_bo_lock);
        return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+   }
+
+   struct tu_bo* bo = tu_device_lookup_bo(dev, gem_handle);
+
+   if (bo->refcnt != 0) {
+      p_atomic_inc(&bo->refcnt);
+      u_rwlock_wrunlock(&dev->dma_bo_lock);
+
+      *out_bo = bo;
+      return VK_SUCCESS;
+   }
+
+   VkResult result = tu_bo_init(dev, bo, gem_handle, size, false);
+
+   if (result != VK_SUCCESS)
+      memset(bo, 0, sizeof(*bo));
+   else
+      *out_bo = bo;
  
-   return tu_bo_init(dev, bo, gem_handle, size, false);
+   u_rwlock_wrunlock(&dev->dma_bo_lock);
+
+   return result;
  }
  
  int
@@ -328,17 +355,35 @@ tu_bo_finish(struct tu_device *dev, struct tu_bo *bo)
  {
     assert(bo->gem_handle);
  
+   u_rwlock_rdlock(&dev->dma_bo_lock);
+
+   if (!p_atomic_dec_zero(&bo->refcnt)) {
+      u_rwlock_rdunlock(&dev->dma_bo_lock);
+      return;
+   }
+
     if (bo->map)
        munmap(bo->map, bo->size);
  
     mtx_lock(&dev->bo_mutex);
-   uint32_t idx = dev->bo_idx[bo->gem_handle];
     dev->bo_count--;
-   dev->bo_list[idx] = dev->bo_list[dev->bo_count];
-   dev->bo_idx[dev->bo_list[idx].handle] = idx;
+   dev->bo_list[bo->bo_list_idx] = dev->bo_list[dev->bo_count];
+
+   struct tu_bo* exchanging_bo = tu_device_lookup_bo(dev, dev->bo_list[bo->bo_list_idx].handle);
+   exchanging_bo->bo_list_idx = bo->bo_list_idx;
+
     mtx_unlock(&dev->bo_mutex);
  
-   tu_gem_close(dev, bo->gem_handle);
+   /* Our BO structs are stored in a sparse array in the physical device,
+    * so we don't want to free the BO pointer, instead we want to reset it
+    * to 0, to signal that array entry as being free.
+    */
+   uint32_t gem_handle = bo->gem_handle;
+   memset(bo, 0, sizeof(*bo));
+
+   tu_gem_close(dev, gem_handle);
+
+   u_rwlock_rdunlock(&dev->dma_bo_lock);
  }
  
  extern const struct vk_sync_type tu_timeline_sync_type;
@@ -833,8 +878,7 @@ tu_fill_msm_gem_submit(struct tu_device *dev,
                         struct tu_cs_entry *cs_entry)
  {
     cmd->type = MSM_SUBMIT_CMD_BUF;
-   cmd->submit_idx =
-      dev->bo_idx[cs_entry->bo->gem_handle];
+   cmd->submit_idx = cs_entry->bo->bo_list_idx;
     cmd->submit_offset = cs_entry->offset;
     cmd->size = cs_entry->size;
     cmd->pad = 0;
diff --git a/src/freedreno/vulkan/tu_kgsl.c b/src/freedreno/vulkan/tu_kgsl.c

index 4b18de8..0caece7 100644 (file)
--- a/src/freedreno/vulkan/tu_kgsl.c
+++ b/src/freedreno/vulkan/tu_kgsl.c
@@ -83,7 +83,7 @@ tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id)
  }
  
  VkResult
-tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
+tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
                 enum tu_bo_alloc_flags flags)
  {
     struct kgsl_gpumem_alloc_id req = {
@@ -102,18 +102,23 @@ tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
                         "GPUMEM_ALLOC_ID failed (%s)", strerror(errno));
     }
  
+   struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
+   assert(bo && bo->gem_handle == 0);
+
     *bo = (struct tu_bo) {
        .gem_handle = req.id,
        .size = req.mmapsize,
        .iova = req.gpuaddr,
     };
  
+   *out_bo = bo;
+
     return VK_SUCCESS;
  }
  
  VkResult
  tu_bo_init_dmabuf(struct tu_device *dev,
-                  struct tu_bo *bo,
+                  struct tu_bo **out_bo,
                    uint64_t size,
                    int fd)
  {
@@ -144,12 +149,17 @@ tu_bo_init_dmabuf(struct tu_device *dev,
        return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
                         "Failed to get dma-buf info (%s)\n", strerror(errno));
  
+   struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
+   assert(bo && bo->gem_handle == 0);
+
     *bo = (struct tu_bo) {
        .gem_handle = req.id,
        .size = info_req.size,
        .iova = info_req.gpuaddr,
     };
  
+   *out_bo = bo;
+
     return VK_SUCCESS;
  }
  
@@ -190,6 +200,9 @@ tu_bo_finish(struct tu_device *dev, struct tu_bo *bo)
        .id = bo->gem_handle
     };
  
+   /* Tell sparse array that entry is free */
+   memset(bo, 0, sizeof(*bo));
+
     safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req);
  }
  
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c

index b52ba8c..1a84cb3 100644 (file)
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -1610,11 +1610,11 @@ tu6_emit_geom_tess_consts(struct tu_cs *cs,
  
        /* Create the shared tess factor BO the first time tess is used on the device. */
        mtx_lock(&dev->mutex);
-      if (!dev->tess_bo.size)
+      if (!dev->tess_bo)
           tu_bo_init_new(dev, &dev->tess_bo, TU_TESS_BO_SIZE, TU_BO_ALLOC_NO_FLAGS);
        mtx_unlock(&dev->mutex);
  
-      uint64_t tess_factor_iova = dev->tess_bo.iova;
+      uint64_t tess_factor_iova = dev->tess_bo->iova;
        uint64_t tess_param_iova = tess_factor_iova + TU_TESS_FACTOR_SIZE;
  
        uint32_t hs_params[8] = {
@@ -2215,7 +2215,7 @@ tu_setup_pvtmem(struct tu_device *dev,
     if (result != VK_SUCCESS)
        return result;
  
-   config->iova = pipeline->pvtmem_bo.iova;
+   config->iova = pipeline->pvtmem_bo->iova;
  
     return result;
  }
@@ -3156,8 +3156,8 @@ tu_pipeline_finish(struct tu_pipeline *pipeline,
  {
     tu_cs_finish(&pipeline->cs);
  
-   if (pipeline->pvtmem_bo.size)
-      tu_bo_finish(dev, &pipeline->pvtmem_bo);
+   if (pipeline->pvtmem_bo)
+      tu_bo_finish(dev, pipeline->pvtmem_bo);
  
     ralloc_free(pipeline->executables_mem_ctx);
  }
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h

index 6e75f71..10b49bd 100644 (file)
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -51,6 +51,7 @@
  #include "util/list.h"
  #include "util/log.h"
  #include "util/macros.h"
+#include "util/sparse_array.h"
  #include "util/u_atomic.h"
  #include "util/u_dynarray.h"
  #include "util/xmlconfig.h"
@@ -356,6 +357,11 @@ struct tu_bo
     uint64_t size;
     uint64_t iova;
     void *map;
+
+#ifndef TU_USE_KGSL
+   int32_t refcnt;
+   uint32_t bo_list_idx;
+#endif
  };
  
  enum global_shader {
@@ -400,7 +406,7 @@ struct tu6_global
     struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN], bcolor[];
  };
  #define gb_offset(member) offsetof(struct tu6_global, member)
-#define global_iova(cmd, member) ((cmd)->device->global_bo.iova + gb_offset(member))
+#define global_iova(cmd, member) ((cmd)->device->global_bo->iova + gb_offset(member))
  
  /* extra space in vsc draw/prim streams */
  #define VSC_PAD 0x40
@@ -427,19 +433,19 @@ struct tu_device
      * should be impossible to go beyond 48 bits.
      */
     struct {
-      struct tu_bo bo;
+      struct tu_bo *bo;
        mtx_t construct_mtx;
        bool initialized;
     } scratch_bos[48 - MIN_SCRATCH_BO_SIZE_LOG2];
  
-   struct tu_bo global_bo;
+   struct tu_bo *global_bo;
  
     /* the blob seems to always use 8K factor and 128K param sizes, copy them */
  #define TU_TESS_FACTOR_SIZE (8 * 1024)
  #define TU_TESS_PARAM_SIZE (128 * 1024)
  #define TU_TESS_BO_SIZE (TU_TESS_FACTOR_SIZE + TU_TESS_PARAM_SIZE)
     /* Lazily allocated, protected by the device mutex. */
-   struct tu_bo tess_bo;
+   struct tu_bo *tess_bo;
  
     struct ir3_shader_variant *global_shaders[GLOBAL_SH_COUNT];
     uint64_t global_shader_va[GLOBAL_SH_COUNT];
@@ -452,9 +458,27 @@ struct tu_device
     /* bo list for submits: */
     struct drm_msm_gem_submit_bo *bo_list;
     /* map bo handles to bo list index: */
-   uint32_t *bo_idx;
-   uint32_t bo_count, bo_list_size, bo_idx_size;
+   uint32_t bo_count, bo_list_size;
     mtx_t bo_mutex;
+   /* protects imported BOs creation/freeing */
+   struct u_rwlock dma_bo_lock;
+
+   /* This array holds all our 'struct tu_bo' allocations. We use this
+    * so we can add a refcount to our BOs and check if a particular BO
+    * was already allocated in this device using its GEM handle. This is
+    * necessary to properly manage BO imports, because the kernel doesn't
+    * refcount the underlying BO memory.
+    *
+    * Specifically, when self-importing (i.e. importing a BO into the same
+    * device that created it), the kernel will give us the same BO handle
+    * for both BOs and we must only free it once when  both references are
+    * freed. Otherwise, if we are not self-importing, we get two different BO
+    * handles, and we want to free each one individually.
+    *
+    * The BOs in this map all have a refcnt with the reference counter and
+    * only self-imported BOs will ever have a refcnt > 1.
+    */
+   struct util_sparse_array bo_map;
  
     /* Command streams to set pass index to a scratch reg */
     struct tu_cs *perfcntrs_pass_cs;
@@ -506,11 +530,11 @@ enum tu_bo_alloc_flags
  };
  
  VkResult
-tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
+tu_bo_init_new(struct tu_device *dev, struct tu_bo **bo, uint64_t size,
                 enum tu_bo_alloc_flags flags);
  VkResult
  tu_bo_init_dmabuf(struct tu_device *dev,
-                  struct tu_bo *bo,
+                  struct tu_bo **bo,
                    uint64_t size,
                    int fd);
  int
@@ -520,6 +544,12 @@ tu_bo_finish(struct tu_device *dev, struct tu_bo *bo);
  VkResult
  tu_bo_map(struct tu_device *dev, struct tu_bo *bo);
  
+static inline struct tu_bo *
+tu_device_lookup_bo(struct tu_device *device, uint32_t handle)
+{
+   return (struct tu_bo *) util_sparse_array_get(&device->bo_map, handle);
+}
+
  /* Get a scratch bo for use inside a command buffer. This will always return
   * the same bo given the same size or similar sizes, so only one scratch bo
   * can be used at the same time. It's meant for short-lived things where we
@@ -650,7 +680,7 @@ struct tu_device_memory
  {
     struct vk_object_base base;
  
-   struct tu_bo bo;
+   struct tu_bo *bo;
  };
  
  struct tu_descriptor_range
@@ -687,7 +717,7 @@ struct tu_descriptor_pool
  {
     struct vk_object_base base;
  
-   struct tu_bo bo;
+   struct tu_bo *bo;
     uint64_t current_offset;
     uint64_t size;
  
@@ -1190,7 +1220,7 @@ tu_get_descriptors_state(struct tu_cmd_buffer *cmd_buffer,
  struct tu_event
  {
     struct vk_object_base base;
-   struct tu_bo bo;
+   struct tu_bo *bo;
  };
  
  struct tu_push_constant_range
@@ -1257,7 +1287,7 @@ struct tu_pipeline
     struct tu_cs cs;
  
     /* Separate BO for private memory since it should GPU writable */
-   struct tu_bo pvtmem_bo;
+   struct tu_bo *pvtmem_bo;
  
     struct tu_pipeline_layout *layout;
  
@@ -1729,7 +1759,7 @@ struct tu_query_pool
     uint32_t stride;
     uint64_t size;
     uint32_t pipeline_statistics;
-   struct tu_bo bo;
+   struct tu_bo *bo;
  
     /* For performance query */
     const struct fd_perfcntr_group *perf_group;
diff --git a/src/freedreno/vulkan/tu_query.c b/src/freedreno/vulkan/tu_query.c

index fc28e19..1650a0d 100644 (file)
--- a/src/freedreno/vulkan/tu_query.c
+++ b/src/freedreno/vulkan/tu_query.c
@@ -111,13 +111,13 @@ struct PACKED perf_query_slot {
  /* Returns the IOVA of a given uint64_t field in a given slot of a query
   * pool. */
  #define query_iova(type, pool, query, field)                         \
-   pool->bo.iova + pool->stride * (query) + offsetof(type, field)
+   pool->bo->iova + pool->stride * (query) + offsetof(type, field)
  
  #define occlusion_query_iova(pool, query, field)                     \
     query_iova(struct occlusion_query_slot, pool, query, field)
  
  #define pipeline_stat_query_iova(pool, query, field)                 \
-   pool->bo.iova + pool->stride * (query) +                            \
+   pool->bo->iova + pool->stride * (query) +                            \
     offsetof(struct pipeline_stat_query_slot, field)
  
  #define primitive_query_iova(pool, query, field, i)                  \
@@ -125,7 +125,7 @@ struct PACKED perf_query_slot {
     offsetof(struct primitive_slot_value, values[i])
  
  #define perf_query_iova(pool, query, field, i)                          \
-   pool->bo.iova + pool->stride * (query) +                             \
+   pool->bo->iova + pool->stride * (query) +                             \
     sizeof(struct query_slot) +                                   \
     sizeof(struct perfcntr_query_slot) * (i) +                          \
     offsetof(struct perfcntr_query_slot, field)
@@ -134,11 +134,11 @@ struct PACKED perf_query_slot {
     query_iova(struct query_slot, pool, query, available)
  
  #define query_result_iova(pool, query, type, i)                            \
-   pool->bo.iova + pool->stride * (query) +                          \
+   pool->bo->iova + pool->stride * (query) +                          \
     sizeof(struct query_slot) + sizeof(type) * (i)
  
  #define query_result_addr(pool, query, type, i)                            \
-   pool->bo.map + pool->stride * (query) +                             \
+   pool->bo->map + pool->stride * (query) +                             \
     sizeof(struct query_slot) + sizeof(type) * (i)
  
  #define query_is_available(slot) slot->available
@@ -185,7 +185,7 @@ fd_perfcntr_type_to_vk_storage[] = {
   */
  static void* slot_address(struct tu_query_pool *pool, uint32_t query)
  {
-   return (char*)pool->bo.map + query * pool->stride;
+   return (char*)pool->bo->map + query * pool->stride;
  }
  
  static void
@@ -323,15 +323,15 @@ tu_CreateQueryPool(VkDevice _device,
        return result;
     }
  
-   result = tu_bo_map(device, &pool->bo);
+   result = tu_bo_map(device, pool->bo);
     if (result != VK_SUCCESS) {
-      tu_bo_finish(device, &pool->bo);
+      tu_bo_finish(device, pool->bo);
        vk_object_free(&device->vk, pAllocator, pool);
        return result;
     }
  
     /* Initialize all query statuses to unavailable */
-   memset(pool->bo.map, 0, pool->bo.size);
+   memset(pool->bo->map, 0, pool->bo->size);
  
     pool->type = pCreateInfo->queryType;
     pool->stride = slot_size;
@@ -353,7 +353,7 @@ tu_DestroyQueryPool(VkDevice _device,
     if (!pool)
        return;
  
-   tu_bo_finish(device, &pool->bo);
+   tu_bo_finish(device, pool->bo);
     vk_object_free(&device->vk, pAllocator, pool);
  }
author	Danylo Piliaiev <dpiliaiev@igalia.com>
	Wed, 2 Feb 2022 17:29:34 +0000 (19:29 +0200)
committer	Marge Bot <emma+marge@anholt.net>
	Sat, 19 Feb 2022 15:16:55 +0000 (15:16 +0000)
src/freedreno/vulkan/tu_autotune.c		patch \| blob \| history
src/freedreno/vulkan/tu_clear_blit.c		patch \| blob \| history
src/freedreno/vulkan/tu_cmd_buffer.c		patch \| blob \| history
src/freedreno/vulkan/tu_cs.c		patch \| blob \| history
src/freedreno/vulkan/tu_descriptor_set.c		patch \| blob \| history
src/freedreno/vulkan/tu_device.c		patch \| blob \| history
src/freedreno/vulkan/tu_drm.c		patch \| blob \| history
src/freedreno/vulkan/tu_kgsl.c		patch \| blob \| history
src/freedreno/vulkan/tu_pipeline.c		patch \| blob \| history
src/freedreno/vulkan/tu_private.h		patch \| blob \| history
src/freedreno/vulkan/tu_query.c		patch \| blob \| history