renderpass_key_equals);
u_rwlock_init(&at->ht_lock);
- at->results_bo = malloc(sizeof(struct tu_bo));
- result = tu_bo_init_new(dev, at->results_bo,
+ result = tu_bo_init_new(dev, &at->results_bo,
sizeof(struct tu_autotune_results),
TU_BO_ALLOC_NO_FLAGS);
if (result != VK_SUCCESS) {
tu_bo_finish(dev, at->results_bo);
fail_bo:
- free(at->results_bo);
u_rwlock_destroy(&at->ht_lock);
_mesa_hash_table_destroy(at->ht, NULL);
_mesa_hash_table_destroy(at->ht, NULL);
u_rwlock_destroy(&at->ht_lock);
tu_bo_finish(dev, at->results_bo);
- free(at->results_bo);
}
bool
struct ir3_shader_variant *so =
ir3_shader_get_variant(sh, &key, false, false, &created);
- struct tu6_global *global = dev->global_bo.map;
+ struct tu6_global *global = dev->global_bo->map;
assert(*offset + so->info.sizedwords <= ARRAY_SIZE(global->shaders));
dev->global_shaders[idx] = so;
memcpy(&global->shaders[*offset], so->bin,
sizeof(uint32_t) * so->info.sizedwords);
- dev->global_shader_va[idx] = dev->global_bo.iova +
+ dev->global_shader_va[idx] = dev->global_bo->iova +
gb_offset(shaders[*offset]);
*offset += align(so->info.sizedwords, 32);
}
assert(cmd->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
- tu_cs_emit_regs(&cmd->cs, A6XX_PC_TESSFACTOR_ADDR(.qword = cmd->device->tess_bo.iova));
+ tu_cs_emit_regs(&cmd->cs, A6XX_PC_TESSFACTOR_ADDR(.qword = cmd->device->tess_bo->iova));
cmd->state.tessfactor_addr_set = true;
}
tu_disable_draw_states(cmd, cs);
tu_cs_emit_regs(cs,
- A6XX_SP_TP_BORDER_COLOR_BASE_ADDR(.bo = &dev->global_bo,
+ A6XX_SP_TP_BORDER_COLOR_BASE_ADDR(.bo = dev->global_bo,
.bo_offset = gb_offset(bcolor_builtin)));
tu_cs_emit_regs(cs,
- A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR(.bo = &dev->global_bo,
+ A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR(.bo = dev->global_bo,
.bo_offset = gb_offset(bcolor_builtin)));
/* VSC buffers:
*/
mtx_lock(&dev->mutex);
- struct tu6_global *global = dev->global_bo.map;
+ struct tu6_global *global = dev->global_bo->map;
uint32_t vsc_draw_overflow = global->vsc_draw_overflow;
uint32_t vsc_prim_overflow = global->vsc_prim_overflow;
tu_cs_emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
CP_WAIT_REG_MEM_0_POLL_MEMORY);
- tu_cs_emit_qw(cs, event->bo.iova); /* POLL_ADDR_LO/HI */
+ tu_cs_emit_qw(cs, event->bo->iova); /* POLL_ADDR_LO/HI */
tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(1));
tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0u));
tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(20));
if (!(stageMask & ~top_of_pipe_flags)) {
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 3);
- tu_cs_emit_qw(cs, event->bo.iova); /* ADDR_LO/HI */
+ tu_cs_emit_qw(cs, event->bo->iova); /* ADDR_LO/HI */
tu_cs_emit(cs, value);
} else {
/* Use a RB_DONE_TS event to wait for everything to complete. */
tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 4);
tu_cs_emit(cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS));
- tu_cs_emit_qw(cs, event->bo.iova);
+ tu_cs_emit_qw(cs, event->bo->iova);
tu_cs_emit(cs, value);
}
}
{
for (uint32_t i = 0; i < cs->bo_count; ++i) {
tu_bo_finish(cs->device, cs->bos[i]);
- free(cs->bos[i]);
}
free(cs->entries);
cs->bos = new_bos;
}
- struct tu_bo *new_bo = malloc(sizeof(struct tu_bo));
- if (!new_bo)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
+ struct tu_bo *new_bo;
VkResult result =
- tu_bo_init_new(cs->device, new_bo, size * sizeof(uint32_t),
+ tu_bo_init_new(cs->device, &new_bo, size * sizeof(uint32_t),
TU_BO_ALLOC_GPU_READ_ONLY | TU_BO_ALLOC_ALLOW_DUMP);
if (result != VK_SUCCESS) {
free(new_bo);
result = tu_bo_map(cs->device, new_bo);
if (result != VK_SUCCESS) {
tu_bo_finish(cs->device, new_bo);
- free(new_bo);
return result;
}
for (uint32_t i = 0; i + 1 < cs->bo_count; ++i) {
tu_bo_finish(cs->device, cs->bos[i]);
- free(cs->bos[i]);
}
if (cs->bo_count) {
static inline uint8_t *
pool_base(struct tu_descriptor_pool *pool)
{
- return pool->host_bo ?: pool->bo.map;
+ return pool->host_bo ?: pool->bo->map;
}
static uint32_t
* resets via the pool. */
if (pool->current_offset + layout_size <= pool->size) {
set->mapped_ptr = (uint32_t*)(pool_base(pool) + pool->current_offset);
- set->va = pool->host_bo ? 0 : pool->bo.iova + pool->current_offset;
+ set->va = pool->host_bo ? 0 : pool->bo->iova + pool->current_offset;
if (!pool->host_memory_base) {
pool->entries[pool->entry_count].offset = pool->current_offset;
}
set->mapped_ptr = (uint32_t*)(pool_base(pool) + offset);
- set->va = pool->host_bo ? 0 : pool->bo.iova + offset;
+ set->va = pool->host_bo ? 0 : pool->bo->iova + offset;
memmove(&pool->entries[index + 1], &pool->entries[index],
sizeof(pool->entries[0]) * (pool->entry_count - index));
if (ret)
goto fail_alloc;
- ret = tu_bo_map(device, &pool->bo);
+ ret = tu_bo_map(device, pool->bo);
if (ret)
goto fail_map;
} else {
return VK_SUCCESS;
fail_map:
- tu_bo_finish(device, &pool->bo);
+ tu_bo_finish(device, pool->bo);
fail_alloc:
vk_object_free(&device->vk, pAllocator, pool);
return ret;
if (pool->host_bo)
vk_free2(&device->vk.alloc, pAllocator, pool->host_bo);
else
- tu_bo_finish(device, &pool->bo);
+ tu_bo_finish(device, pool->bo);
}
vk_object_free(&device->vk, pAllocator, pool);
struct tu_device *device =
container_of(utctx, struct tu_device, trace_context);
- struct tu_bo *bo = ralloc(NULL, struct tu_bo);
- tu_bo_init_new(device, bo, size, false);
+ struct tu_bo *bo;
+ tu_bo_init_new(device, &bo, size, false);
return bo;
}
struct tu_bo *bo = timestamps;
tu_bo_finish(device, bo);
- ralloc_free(bo);
}
static void
device->fd = physical_device->local_fd;
mtx_init(&device->bo_mutex, mtx_plain);
+ u_rwlock_init(&device->dma_bo_lock);
pthread_mutex_init(&device->submit_mutex, NULL);
#ifndef TU_USE_KGSL
goto fail_queues;
}
+ /* Initialize sparse array for refcounting imported BOs */
+ util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
+
/* initial sizes, these will increase if there is overflow */
device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
goto fail_global_bo;
}
- result = tu_bo_map(device, &device->global_bo);
+ result = tu_bo_map(device, device->global_bo);
if (result != VK_SUCCESS) {
vk_startup_errorf(device->instance, result, "BO map");
goto fail_global_bo_map;
}
- struct tu6_global *global = device->global_bo.map;
+ struct tu6_global *global = device->global_bo->map;
tu_init_clear_blit_shaders(device);
global->predicate = 0;
tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK],
fail_pipeline_cache:
tu_destroy_clear_blit_shaders(device);
fail_global_bo_map:
- tu_bo_finish(device, &device->global_bo);
- vk_free(&device->vk.alloc, device->bo_idx);
+ tu_bo_finish(device, device->global_bo);
vk_free(&device->vk.alloc, device->bo_list);
fail_global_bo:
ir3_compiler_destroy(device->compiler);
+ util_sparse_array_finish(&device->bo_map);
fail_queues:
for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
vk_free(&device->vk.alloc, device->queues[i]);
}
+ u_rwlock_destroy(&device->dma_bo_lock);
vk_device_finish(&device->vk);
vk_free(&device->vk.alloc, device);
return result;
for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
if (device->scratch_bos[i].initialized)
- tu_bo_finish(device, &device->scratch_bos[i].bo);
+ tu_bo_finish(device, device->scratch_bos[i].bo);
}
tu_destroy_clear_blit_shaders(device);
tu_autotune_fini(&device->autotune, device);
+ util_sparse_array_finish(&device->bo_map);
+ u_rwlock_destroy(&device->dma_bo_lock);
+
pthread_cond_destroy(&device->timeline_cond);
vk_free(&device->vk.alloc, device->bo_list);
- vk_free(&device->vk.alloc, device->bo_idx);
vk_device_finish(&device->vk);
vk_free(&device->vk.alloc, device);
}
for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
/* Fast path: just return the already-allocated BO. */
- *bo = &dev->scratch_bos[i].bo;
+ *bo = dev->scratch_bos[i].bo;
return VK_SUCCESS;
}
}
*/
if (dev->scratch_bos[index].initialized) {
mtx_unlock(&dev->scratch_bos[index].construct_mtx);
- *bo = &dev->scratch_bos[index].bo;
+ *bo = dev->scratch_bos[index].bo;
return VK_SUCCESS;
}
mtx_unlock(&dev->scratch_bos[index].construct_mtx);
- *bo = &dev->scratch_bos[index].bo;
+ *bo = dev->scratch_bos[index].bo;
return VK_SUCCESS;
}
if (result == VK_SUCCESS) {
- mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo.size);
+ mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
if (mem_heap_used > mem_heap->size) {
- p_atomic_add(&mem_heap->used, -mem->bo.size);
- tu_bo_finish(device, &mem->bo);
+ p_atomic_add(&mem_heap->used, -mem->bo->size);
+ tu_bo_finish(device, mem->bo);
result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
"Out of heap memory");
}
if (mem == NULL)
return;
- p_atomic_add(&device->physical_device->heap.used, -mem->bo.size);
- tu_bo_finish(device, &mem->bo);
+ p_atomic_add(&device->physical_device->heap.used, -mem->bo->size);
+ tu_bo_finish(device, mem->bo);
vk_object_free(&device->vk, pAllocator, mem);
}
return VK_SUCCESS;
}
- if (!mem->bo.map) {
- result = tu_bo_map(device, &mem->bo);
+ if (!mem->bo->map) {
+ result = tu_bo_map(device, mem->bo);
if (result != VK_SUCCESS)
return result;
}
- *ppData = mem->bo.map + offset;
+ *ppData = mem->bo->map + offset;
return VK_SUCCESS;
}
TU_FROM_HANDLE(tu_buffer, buffer, pBindInfos[i].buffer);
if (mem) {
- buffer->bo = &mem->bo;
- buffer->iova = mem->bo.iova + pBindInfos[i].memoryOffset;
+ buffer->bo = mem->bo;
+ buffer->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
} else {
buffer->bo = NULL;
}
TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
if (mem) {
- image->bo = &mem->bo;
- image->iova = mem->bo.iova + pBindInfos[i].memoryOffset;
+ image->bo = mem->bo;
+ image->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
} else {
image->bo = NULL;
image->iova = 0;
if (result != VK_SUCCESS)
goto fail_alloc;
- result = tu_bo_map(device, &event->bo);
+ result = tu_bo_map(device, event->bo);
if (result != VK_SUCCESS)
goto fail_map;
return VK_SUCCESS;
fail_map:
- tu_bo_finish(device, &event->bo);
+ tu_bo_finish(device, event->bo);
fail_alloc:
vk_object_free(&device->vk, pAllocator, event);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (!event)
return;
- tu_bo_finish(device, &event->bo);
+ tu_bo_finish(device, event->bo);
vk_object_free(&device->vk, pAllocator, event);
}
{
TU_FROM_HANDLE(tu_event, event, _event);
- if (*(uint64_t*) event->bo.map == 1)
+ if (*(uint64_t*) event->bo->map == 1)
return VK_EVENT_SET;
return VK_EVENT_RESET;
}
tu_SetEvent(VkDevice _device, VkEvent _event)
{
TU_FROM_HANDLE(tu_event, event, _event);
- *(uint64_t*) event->bo.map = 1;
+ *(uint64_t*) event->bo->map = 1;
return VK_SUCCESS;
}
tu_ResetEvent(VkDevice _device, VkEvent _event)
{
TU_FROM_HANDLE(tu_event, event, _event);
- *(uint64_t*) event->bo.map = 0;
+ *(uint64_t*) event->bo->map = 0;
return VK_SUCCESS;
}
border_color = BITSET_FFS(device->custom_border_color);
BITSET_CLEAR(device->custom_border_color, border_color);
mtx_unlock(&device->mutex);
- tu6_pack_border_color(device->global_bo.map + gb_offset(bcolor[border_color]),
+ tu6_pack_border_color(device->global_bo->map + gb_offset(bcolor[border_color]),
&custom_border_color->customBorderColor,
pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT);
border_color += TU_BORDER_COLOR_BUILTIN;
pGetFdInfo->handleType ==
VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
- int prime_fd = tu_bo_export_dmabuf(device, &memory->bo);
+ int prime_fd = tu_bo_export_dmabuf(device, memory->bo);
if (prime_fd < 0)
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
}
- *bo = (struct tu_bo) {
- .gem_handle = gem_handle,
- .size = size,
- .iova = iova,
- };
-
mtx_lock(&dev->bo_mutex);
uint32_t idx = dev->bo_count++;
dev->bo_list_size = new_len;
}
- /* grow the "bo idx" list (maps gem handles to index in the bo list) */
- if (bo->gem_handle >= dev->bo_idx_size) {
- uint32_t new_len = bo->gem_handle + 256;
- uint32_t *new_ptr =
- vk_realloc(&dev->vk.alloc, dev->bo_idx, new_len * sizeof(*dev->bo_idx),
- 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
- if (!new_ptr)
- goto fail_bo_idx;
-
- dev->bo_idx = new_ptr;
- dev->bo_idx_size = new_len;
- }
-
- dev->bo_idx[bo->gem_handle] = idx;
dev->bo_list[idx] = (struct drm_msm_gem_submit_bo) {
.flags = MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE |
COND(dump, MSM_SUBMIT_BO_DUMP),
.handle = gem_handle,
.presumed = iova,
};
+
+ *bo = (struct tu_bo) {
+ .gem_handle = gem_handle,
+ .size = size,
+ .iova = iova,
+ .refcnt = 1,
+ .bo_list_idx = idx,
+ };
+
mtx_unlock(&dev->bo_mutex);
return VK_SUCCESS;
-fail_bo_idx:
- vk_free(&dev->vk.alloc, dev->bo_list);
fail_bo_list:
tu_gem_close(dev, gem_handle);
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
VkResult
-tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
+tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
enum tu_bo_alloc_flags flags)
{
/* TODO: Choose better flags. As of 2018-11-12, freedreno/drm/msm_bo.c
if (ret)
return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- return tu_bo_init(dev, bo, req.handle, size, flags & TU_BO_ALLOC_ALLOW_DUMP);
+ struct tu_bo* bo = tu_device_lookup_bo(dev, req.handle);
+ assert(bo && bo->gem_handle == 0);
+
+ VkResult result =
+ tu_bo_init(dev, bo, req.handle, size, flags & TU_BO_ALLOC_ALLOW_DUMP);
+
+ if (result != VK_SUCCESS)
+ memset(bo, 0, sizeof(*bo));
+ else
+ *out_bo = bo;
+
+ return result;
}
VkResult
tu_bo_init_dmabuf(struct tu_device *dev,
- struct tu_bo *bo,
+ struct tu_bo **out_bo,
uint64_t size,
int prime_fd)
{
if (real_size < 0 || (uint64_t) real_size < size)
return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ /* Importing the same dmabuf several times would yield the same
+ * gem_handle. Thus there could be a race when destroying
+ * BO and importing the same dmabuf from different threads.
+ * We must not permit the creation of dmabuf BO and its release
+ * to happen in parallel.
+ */
+ u_rwlock_wrlock(&dev->dma_bo_lock);
+
uint32_t gem_handle;
int ret = drmPrimeFDToHandle(dev->fd, prime_fd,
&gem_handle);
- if (ret)
+ if (ret) {
+ u_rwlock_wrunlock(&dev->dma_bo_lock);
return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ }
+
+ struct tu_bo* bo = tu_device_lookup_bo(dev, gem_handle);
+
+ if (bo->refcnt != 0) {
+ p_atomic_inc(&bo->refcnt);
+ u_rwlock_wrunlock(&dev->dma_bo_lock);
+
+ *out_bo = bo;
+ return VK_SUCCESS;
+ }
+
+ VkResult result = tu_bo_init(dev, bo, gem_handle, size, false);
+
+ if (result != VK_SUCCESS)
+ memset(bo, 0, sizeof(*bo));
+ else
+ *out_bo = bo;
- return tu_bo_init(dev, bo, gem_handle, size, false);
+ u_rwlock_wrunlock(&dev->dma_bo_lock);
+
+ return result;
}
int
{
assert(bo->gem_handle);
+ u_rwlock_rdlock(&dev->dma_bo_lock);
+
+ if (!p_atomic_dec_zero(&bo->refcnt)) {
+ u_rwlock_rdunlock(&dev->dma_bo_lock);
+ return;
+ }
+
if (bo->map)
munmap(bo->map, bo->size);
mtx_lock(&dev->bo_mutex);
- uint32_t idx = dev->bo_idx[bo->gem_handle];
dev->bo_count--;
- dev->bo_list[idx] = dev->bo_list[dev->bo_count];
- dev->bo_idx[dev->bo_list[idx].handle] = idx;
+ dev->bo_list[bo->bo_list_idx] = dev->bo_list[dev->bo_count];
+
+ struct tu_bo* exchanging_bo = tu_device_lookup_bo(dev, dev->bo_list[bo->bo_list_idx].handle);
+ exchanging_bo->bo_list_idx = bo->bo_list_idx;
+
mtx_unlock(&dev->bo_mutex);
- tu_gem_close(dev, bo->gem_handle);
+ /* Our BO structs are stored in a sparse array in the physical device,
+ * so we don't want to free the BO pointer, instead we want to reset it
+ * to 0, to signal that array entry as being free.
+ */
+ uint32_t gem_handle = bo->gem_handle;
+ memset(bo, 0, sizeof(*bo));
+
+ tu_gem_close(dev, gem_handle);
+
+ u_rwlock_rdunlock(&dev->dma_bo_lock);
}
extern const struct vk_sync_type tu_timeline_sync_type;
struct tu_cs_entry *cs_entry)
{
cmd->type = MSM_SUBMIT_CMD_BUF;
- cmd->submit_idx =
- dev->bo_idx[cs_entry->bo->gem_handle];
+ cmd->submit_idx = cs_entry->bo->bo_list_idx;
cmd->submit_offset = cs_entry->offset;
cmd->size = cs_entry->size;
cmd->pad = 0;
}
VkResult
-tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
+tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
enum tu_bo_alloc_flags flags)
{
struct kgsl_gpumem_alloc_id req = {
"GPUMEM_ALLOC_ID failed (%s)", strerror(errno));
}
+ struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
+ assert(bo && bo->gem_handle == 0);
+
*bo = (struct tu_bo) {
.gem_handle = req.id,
.size = req.mmapsize,
.iova = req.gpuaddr,
};
+ *out_bo = bo;
+
return VK_SUCCESS;
}
VkResult
tu_bo_init_dmabuf(struct tu_device *dev,
- struct tu_bo *bo,
+ struct tu_bo **out_bo,
uint64_t size,
int fd)
{
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
"Failed to get dma-buf info (%s)\n", strerror(errno));
+ struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
+ assert(bo && bo->gem_handle == 0);
+
*bo = (struct tu_bo) {
.gem_handle = req.id,
.size = info_req.size,
.iova = info_req.gpuaddr,
};
+ *out_bo = bo;
+
return VK_SUCCESS;
}
.id = bo->gem_handle
};
+ /* Tell sparse array that entry is free */
+ memset(bo, 0, sizeof(*bo));
+
safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req);
}
/* Create the shared tess factor BO the first time tess is used on the device. */
mtx_lock(&dev->mutex);
- if (!dev->tess_bo.size)
+ if (!dev->tess_bo)
tu_bo_init_new(dev, &dev->tess_bo, TU_TESS_BO_SIZE, TU_BO_ALLOC_NO_FLAGS);
mtx_unlock(&dev->mutex);
- uint64_t tess_factor_iova = dev->tess_bo.iova;
+ uint64_t tess_factor_iova = dev->tess_bo->iova;
uint64_t tess_param_iova = tess_factor_iova + TU_TESS_FACTOR_SIZE;
uint32_t hs_params[8] = {
if (result != VK_SUCCESS)
return result;
- config->iova = pipeline->pvtmem_bo.iova;
+ config->iova = pipeline->pvtmem_bo->iova;
return result;
}
{
tu_cs_finish(&pipeline->cs);
- if (pipeline->pvtmem_bo.size)
- tu_bo_finish(dev, &pipeline->pvtmem_bo);
+ if (pipeline->pvtmem_bo)
+ tu_bo_finish(dev, pipeline->pvtmem_bo);
ralloc_free(pipeline->executables_mem_ctx);
}
#include "util/list.h"
#include "util/log.h"
#include "util/macros.h"
+#include "util/sparse_array.h"
#include "util/u_atomic.h"
#include "util/u_dynarray.h"
#include "util/xmlconfig.h"
uint64_t size;
uint64_t iova;
void *map;
+
+#ifndef TU_USE_KGSL
+ int32_t refcnt;
+ uint32_t bo_list_idx;
+#endif
};
enum global_shader {
struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN], bcolor[];
};
#define gb_offset(member) offsetof(struct tu6_global, member)
-#define global_iova(cmd, member) ((cmd)->device->global_bo.iova + gb_offset(member))
+#define global_iova(cmd, member) ((cmd)->device->global_bo->iova + gb_offset(member))
/* extra space in vsc draw/prim streams */
#define VSC_PAD 0x40
* should be impossible to go beyond 48 bits.
*/
struct {
- struct tu_bo bo;
+ struct tu_bo *bo;
mtx_t construct_mtx;
bool initialized;
} scratch_bos[48 - MIN_SCRATCH_BO_SIZE_LOG2];
- struct tu_bo global_bo;
+ struct tu_bo *global_bo;
/* the blob seems to always use 8K factor and 128K param sizes, copy them */
#define TU_TESS_FACTOR_SIZE (8 * 1024)
#define TU_TESS_PARAM_SIZE (128 * 1024)
#define TU_TESS_BO_SIZE (TU_TESS_FACTOR_SIZE + TU_TESS_PARAM_SIZE)
/* Lazily allocated, protected by the device mutex. */
- struct tu_bo tess_bo;
+ struct tu_bo *tess_bo;
struct ir3_shader_variant *global_shaders[GLOBAL_SH_COUNT];
uint64_t global_shader_va[GLOBAL_SH_COUNT];
/* bo list for submits: */
struct drm_msm_gem_submit_bo *bo_list;
/* map bo handles to bo list index: */
- uint32_t *bo_idx;
- uint32_t bo_count, bo_list_size, bo_idx_size;
+ uint32_t bo_count, bo_list_size;
mtx_t bo_mutex;
+ /* protects imported BOs creation/freeing */
+ struct u_rwlock dma_bo_lock;
+
+ /* This array holds all our 'struct tu_bo' allocations. We use this
+ * so we can add a refcount to our BOs and check if a particular BO
+ * was already allocated in this device using its GEM handle. This is
+ * necessary to properly manage BO imports, because the kernel doesn't
+ * refcount the underlying BO memory.
+ *
+ * Specifically, when self-importing (i.e. importing a BO into the same
+ * device that created it), the kernel will give us the same BO handle
+ * for both BOs and we must only free it once when both references are
+ * freed. Otherwise, if we are not self-importing, we get two different BO
+ * handles, and we want to free each one individually.
+ *
+ * The BOs in this map all have a refcnt with the reference counter and
+ * only self-imported BOs will ever have a refcnt > 1.
+ */
+ struct util_sparse_array bo_map;
/* Command streams to set pass index to a scratch reg */
struct tu_cs *perfcntrs_pass_cs;
};
VkResult
-tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
+tu_bo_init_new(struct tu_device *dev, struct tu_bo **bo, uint64_t size,
enum tu_bo_alloc_flags flags);
VkResult
tu_bo_init_dmabuf(struct tu_device *dev,
- struct tu_bo *bo,
+ struct tu_bo **bo,
uint64_t size,
int fd);
int
VkResult
tu_bo_map(struct tu_device *dev, struct tu_bo *bo);
+static inline struct tu_bo *
+tu_device_lookup_bo(struct tu_device *device, uint32_t handle)
+{
+ return (struct tu_bo *) util_sparse_array_get(&device->bo_map, handle);
+}
+
/* Get a scratch bo for use inside a command buffer. This will always return
* the same bo given the same size or similar sizes, so only one scratch bo
* can be used at the same time. It's meant for short-lived things where we
{
struct vk_object_base base;
- struct tu_bo bo;
+ struct tu_bo *bo;
};
struct tu_descriptor_range
{
struct vk_object_base base;
- struct tu_bo bo;
+ struct tu_bo *bo;
uint64_t current_offset;
uint64_t size;
struct tu_event
{
struct vk_object_base base;
- struct tu_bo bo;
+ struct tu_bo *bo;
};
struct tu_push_constant_range
struct tu_cs cs;
/* Separate BO for private memory since it should GPU writable */
- struct tu_bo pvtmem_bo;
+ struct tu_bo *pvtmem_bo;
struct tu_pipeline_layout *layout;
uint32_t stride;
uint64_t size;
uint32_t pipeline_statistics;
- struct tu_bo bo;
+ struct tu_bo *bo;
/* For performance query */
const struct fd_perfcntr_group *perf_group;
/* Returns the IOVA of a given uint64_t field in a given slot of a query
* pool. */
#define query_iova(type, pool, query, field) \
- pool->bo.iova + pool->stride * (query) + offsetof(type, field)
+ pool->bo->iova + pool->stride * (query) + offsetof(type, field)
#define occlusion_query_iova(pool, query, field) \
query_iova(struct occlusion_query_slot, pool, query, field)
#define pipeline_stat_query_iova(pool, query, field) \
- pool->bo.iova + pool->stride * (query) + \
+ pool->bo->iova + pool->stride * (query) + \
offsetof(struct pipeline_stat_query_slot, field)
#define primitive_query_iova(pool, query, field, i) \
offsetof(struct primitive_slot_value, values[i])
#define perf_query_iova(pool, query, field, i) \
- pool->bo.iova + pool->stride * (query) + \
+ pool->bo->iova + pool->stride * (query) + \
sizeof(struct query_slot) + \
sizeof(struct perfcntr_query_slot) * (i) + \
offsetof(struct perfcntr_query_slot, field)
query_iova(struct query_slot, pool, query, available)
#define query_result_iova(pool, query, type, i) \
- pool->bo.iova + pool->stride * (query) + \
+ pool->bo->iova + pool->stride * (query) + \
sizeof(struct query_slot) + sizeof(type) * (i)
#define query_result_addr(pool, query, type, i) \
- pool->bo.map + pool->stride * (query) + \
+ pool->bo->map + pool->stride * (query) + \
sizeof(struct query_slot) + sizeof(type) * (i)
#define query_is_available(slot) slot->available
*/
static void* slot_address(struct tu_query_pool *pool, uint32_t query)
{
- return (char*)pool->bo.map + query * pool->stride;
+ return (char*)pool->bo->map + query * pool->stride;
}
static void
return result;
}
- result = tu_bo_map(device, &pool->bo);
+ result = tu_bo_map(device, pool->bo);
if (result != VK_SUCCESS) {
- tu_bo_finish(device, &pool->bo);
+ tu_bo_finish(device, pool->bo);
vk_object_free(&device->vk, pAllocator, pool);
return result;
}
/* Initialize all query statuses to unavailable */
- memset(pool->bo.map, 0, pool->bo.size);
+ memset(pool->bo->map, 0, pool->bo->size);
pool->type = pCreateInfo->queryType;
pool->stride = slot_size;
if (!pool)
return;
- tu_bo_finish(device, &pool->bo);
+ tu_bo_finish(device, pool->bo);
vk_object_free(&device->vk, pAllocator, pool);
}