From a278f7d21eb046f4053194870a48f330e0569f65 Mon Sep 17 00:00:00 2001 From: Karmjit Mahil Date: Mon, 13 Jun 2022 14:42:47 +0100 Subject: [PATCH] pvr: Add static consts support in descriptor pds program. Signed-off-by: Karmjit Mahil Reviewed-by: Rajnesh Kanwal Part-of: --- src/imagination/rogue/rogue_build_data.h | 17 ++++ src/imagination/vulkan/pvr_cmd_buffer.c | 23 +++++ src/imagination/vulkan/pvr_pipeline.c | 154 ++++++++++++++++++++----------- src/imagination/vulkan/pvr_private.h | 3 + 4 files changed, 144 insertions(+), 53 deletions(-) diff --git a/src/imagination/rogue/rogue_build_data.h b/src/imagination/rogue/rogue_build_data.h index b1d178a..4d5dc58 100644 --- a/src/imagination/rogue/rogue_build_data.h +++ b/src/imagination/rogue/rogue_build_data.h @@ -61,6 +61,22 @@ struct rogue_ubo_data { }; /** + * \brief Compile time constants that need uploading. + */ +struct rogue_compile_time_consts_data { + /* TODO: Output these from the compiler. */ + /* TODO: Add the other types. */ + struct { + size_t num; + size_t dest; + /* TODO: This should probably be bigger. Big enough to account for all + * available writable special constant regs. + */ + uint32_t value[ROGUE_MAX_BUFFERS]; + } static_consts; +}; + +/** * \brief Per-stage common build data. */ struct rogue_common_build_data { @@ -70,6 +86,7 @@ struct rogue_common_build_data { size_t shareds; struct rogue_ubo_data ubo_data; + struct rogue_compile_time_consts_data compile_time_consts_data; }; /** diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index d3221d2..d4d9c14 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -2826,6 +2826,29 @@ static VkResult pvr_setup_descriptor_mappings( break; } + case PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER: { + const struct pvr_const_map_entry_special_buffer *special_buff_entry = + (struct pvr_const_map_entry_special_buffer *)entries; + + switch (special_buff_entry->buffer_type) { + case PVR_BUFFER_TYPES_COMPILE_TIME: { + uint64_t addr = descriptor_state->static_consts->vma->dev_addr.addr; + + PVR_WRITE(qword_buffer, + addr, + special_buff_entry->const_offset, + pds_info->data_size_in_dwords); + break; + } + + default: + unreachable("Unsupported special buffer type."); + } + + entries += sizeof(*special_buff_entry); + break; + } + default: unreachable("Unsupported map entry type."); } diff --git a/src/imagination/vulkan/pvr_pipeline.c b/src/imagination/vulkan/pvr_pipeline.c index d68dd7b..0e77488 100644 --- a/src/imagination/vulkan/pvr_pipeline.c +++ b/src/imagination/vulkan/pvr_pipeline.c @@ -561,11 +561,22 @@ static size_t pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes() typedef struct pvr_pds_buffer ( *const pvr_pds_descriptor_program_buffer_array_ptr)[PVR_PDS_MAX_BUFFERS]; -static void pvr_pds_descriptor_program_setup_buffers( +/** + * \brief Setup buffers for the PDS descriptor program. + * + * Sets up buffers required by the PDS gen api based on compiler info. + * + * For compile time static constants that need DMAing it uploads them and + * returns the upload in \r static_consts_pvr_bo_out . + */ +static VkResult pvr_pds_descriptor_program_setup_buffers( + struct pvr_device *device, bool robust_buffer_access, + const struct rogue_compile_time_consts_data *compile_time_consts_data, const struct rogue_ubo_data *ubo_data, pvr_pds_descriptor_program_buffer_array_ptr buffers_out_ptr, - uint32_t *const buffer_count_out) + uint32_t *const buffer_count_out, + struct pvr_bo **const static_consts_pvr_bo_out) { struct pvr_pds_buffer *const buffers = *buffers_out_ptr; uint32_t buffer_count = 0; @@ -591,21 +602,56 @@ static void pvr_pds_descriptor_program_setup_buffers( buffer_count++; } + if (compile_time_consts_data->static_consts.num > 0) { + VkResult result; + + assert(compile_time_consts_data->static_consts.num <= + ARRAY_SIZE(compile_time_consts_data->static_consts.value)); + + /* This is fine since buffers_out_ptr is a pointer to an array. */ + assert(buffer_count < ARRAY_SIZE(*buffers_out_ptr)); + + /* TODO: Is it possible to have multiple static consts buffer where the + * destination is not adjoining? If so we need to handle that. + * Currently we're only setting up a single buffer. + */ + buffers[buffer_count++] = (struct pvr_pds_buffer){ + .type = PVR_BUFFER_TYPES_COMPILE_TIME, + .size_in_dwords = compile_time_consts_data->static_consts.num, + .destination = compile_time_consts_data->static_consts.dest, + }; + + result = pvr_gpu_upload(device, + device->heaps.general_heap, + compile_time_consts_data->static_consts.value, + compile_time_consts_data->static_consts.num * + ROGUE_REG_SIZE_BYTES, + ROGUE_REG_SIZE_BYTES, + static_consts_pvr_bo_out); + if (result != VK_SUCCESS) + return result; + } else { + *static_consts_pvr_bo_out = NULL; + } + *buffer_count_out = buffer_count; + + return VK_SUCCESS; } static VkResult pvr_pds_descriptor_program_create_and_upload( struct pvr_device *const device, const VkAllocationCallbacks *const allocator, + const struct rogue_compile_time_consts_data *const compile_time_consts_data, const struct rogue_ubo_data *const ubo_data, const struct pvr_explicit_constant_usage *const explicit_const_usage, const struct pvr_pipeline_layout *const layout, enum pvr_stage_allocation stage, - struct pvr_pds_upload *const pds_code_upload_out, - struct pvr_pds_info *const pds_info_out) + struct pvr_stage_allocation_descriptor_state *const descriptor_state) { const size_t const_entries_size_in_bytes = pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes(); + struct pvr_pds_info *const pds_info = &descriptor_state->pds_info; struct pvr_descriptor_program_input program = { 0 }; struct pvr_const_map_entry *entries_buffer; ASSERTED uint32_t code_size_in_dwords; @@ -615,19 +661,18 @@ static VkResult pvr_pds_descriptor_program_create_and_upload( assert(stage != PVR_STAGE_ALLOCATION_COUNT); - memset(pds_info_out, 0, sizeof(*pds_info_out)); - - pvr_pds_descriptor_program_setup_buffers(device->features.robustBufferAccess, - ubo_data, - &program.buffers, - &program.buffer_count); + *pds_info = (struct pvr_pds_info){ 0 }; - for (uint32_t dma = 0; dma < program.buffer_count; dma++) { - if (program.buffers[dma].type != PVR_BUFFER_TYPES_COMPILE_TIME) - continue; - - assert(!"Unimplemented"); - } + result = pvr_pds_descriptor_program_setup_buffers( + device, + device->features.robustBufferAccess, + compile_time_consts_data, + ubo_data, + &program.buffers, + &program.buffer_count, + &descriptor_state->static_consts); + if (result != VK_SUCCESS) + return result; if (layout->per_stage_reg_info[stage].primary_dynamic_size_in_dwords) assert(!"Unimplemented"); @@ -667,23 +712,26 @@ static VkResult pvr_pds_descriptor_program_create_and_upload( const_entries_size_in_bytes, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!entries_buffer) + if (!entries_buffer) { + pvr_bo_free(device, descriptor_state->static_consts); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } - pds_info_out->entries = entries_buffer; - pds_info_out->entries_size_in_bytes = const_entries_size_in_bytes; + pds_info->entries = entries_buffer; + pds_info->entries_size_in_bytes = const_entries_size_in_bytes; - pvr_pds_generate_descriptor_upload_program(&program, NULL, pds_info_out); + pvr_pds_generate_descriptor_upload_program(&program, NULL, pds_info); - code_size_in_dwords = pds_info_out->code_size_in_dwords; + code_size_in_dwords = pds_info->code_size_in_dwords; staging_buffer_size = - pds_info_out->code_size_in_dwords * sizeof(*staging_buffer); + pds_info->code_size_in_dwords * sizeof(*staging_buffer); if (!staging_buffer_size) { vk_free2(&device->vk.alloc, allocator, entries_buffer); - memset(pds_info_out, 0, sizeof(*pds_info_out)); - memset(pds_code_upload_out, 0, sizeof(*pds_code_upload_out)); + *descriptor_state = (struct pvr_stage_allocation_descriptor_state){ 0 }; + return VK_SUCCESS; } @@ -693,30 +741,33 @@ static VkResult pvr_pds_descriptor_program_create_and_upload( 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); if (!staging_buffer) { + pvr_bo_free(device, descriptor_state->static_consts); vk_free2(&device->vk.alloc, allocator, entries_buffer); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } pvr_pds_generate_descriptor_upload_program(&program, staging_buffer, - pds_info_out); + pds_info); - assert(pds_info_out->code_size_in_dwords <= code_size_in_dwords); + assert(pds_info->code_size_in_dwords <= code_size_in_dwords); /* FIXME: use vk_realloc2() ? */ entries_buffer = vk_realloc((!allocator) ? &device->vk.alloc : allocator, entries_buffer, - pds_info_out->entries_written_size_in_bytes, + pds_info->entries_written_size_in_bytes, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!entries_buffer) { + pvr_bo_free(device, descriptor_state->static_consts); vk_free2(&device->vk.alloc, allocator, staging_buffer); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } - pds_info_out->entries = entries_buffer; - pds_info_out->entries_size_in_bytes = - pds_info_out->entries_written_size_in_bytes; + pds_info->entries = entries_buffer; + pds_info->entries_size_in_bytes = pds_info->entries_written_size_in_bytes; /* FIXME: Figure out the define for alignment of 16. */ result = pvr_gpu_upload_pds(device, @@ -724,11 +775,12 @@ static VkResult pvr_pds_descriptor_program_create_and_upload( 0, 0, staging_buffer, - pds_info_out->code_size_in_dwords, + pds_info->code_size_in_dwords, 16, 16, - pds_code_upload_out); + &descriptor_state->pds_code); if (result != VK_SUCCESS) { + pvr_bo_free(device, descriptor_state->static_consts); vk_free2(&device->vk.alloc, allocator, entries_buffer); vk_free2(&device->vk.alloc, allocator, staging_buffer); @@ -743,11 +795,11 @@ static VkResult pvr_pds_descriptor_program_create_and_upload( static void pvr_pds_descriptor_program_destroy( struct pvr_device *const device, const struct VkAllocationCallbacks *const allocator, - struct pvr_pds_upload *const pds_code, - struct pvr_pds_info *const pds_info) + struct pvr_stage_allocation_descriptor_state *const descriptor_state) { - pvr_bo_free(device, pds_code->pvr_bo); - vk_free2(&device->vk.alloc, allocator, pds_info->entries); + pvr_bo_free(device, descriptor_state->pds_code.pvr_bo); + vk_free2(&device->vk.alloc, allocator, descriptor_state->pds_info.entries); + pvr_bo_free(device, descriptor_state->static_consts); } static void pvr_pds_compute_program_setup( @@ -1014,6 +1066,7 @@ static VkResult pvr_compute_pipeline_compile( const VkAllocationCallbacks *const allocator, struct pvr_compute_pipeline *const compute_pipeline) { + struct rogue_compile_time_consts_data compile_time_consts_data; uint32_t work_group_input_regs[PVR_WORKGROUP_DIMENSIONS]; struct pvr_explicit_constant_usage explicit_const_usage; uint32_t local_input_regs[PVR_WORKGROUP_DIMENSIONS]; @@ -1065,12 +1118,12 @@ static VkResult pvr_compute_pipeline_compile( result = pvr_pds_descriptor_program_create_and_upload( device, allocator, + &compile_time_consts_data, &ubo_data, &explicit_const_usage, compute_pipeline->base.layout, PVR_STAGE_ALLOCATION_COMPUTE, - &compute_pipeline->state.descriptor.pds_code, - &compute_pipeline->state.descriptor.pds_info); + &compute_pipeline->state.descriptor); if (result != VK_SUCCESS) goto err_free_shader; @@ -1207,11 +1260,9 @@ static void pvr_compute_pipeline_destroy( allocator, &compute_pipeline->state.primary_program, &compute_pipeline->state.primary_program_info); - pvr_pds_descriptor_program_destroy( - device, - allocator, - &compute_pipeline->state.descriptor.pds_code, - &compute_pipeline->state.descriptor.pds_info); + pvr_pds_descriptor_program_destroy(device, + allocator, + &compute_pipeline->state.descriptor); pvr_bo_free(device, compute_pipeline->state.shader.bo); pvr_pipeline_finish(&compute_pipeline->base); @@ -1286,14 +1337,12 @@ pvr_graphics_pipeline_destroy(struct pvr_device *const device, pvr_pds_descriptor_program_destroy( device, allocator, - &gfx_pipeline->fragment_shader_state.descriptor_state.pds_code, - &gfx_pipeline->fragment_shader_state.descriptor_state.pds_info); + &gfx_pipeline->fragment_shader_state.descriptor_state); pvr_pds_descriptor_program_destroy( device, allocator, - &gfx_pipeline->vertex_shader_state.descriptor_state.pds_code, - &gfx_pipeline->vertex_shader_state.descriptor_state.pds_info); + &gfx_pipeline->vertex_shader_state.descriptor_state); for (uint32_t i = 0; i < num_vertex_attrib_programs; i++) { struct pvr_pds_attrib_program *const attrib_program = @@ -1587,12 +1636,12 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device, result = pvr_pds_descriptor_program_create_and_upload( device, allocator, + &ctx->common_data[MESA_SHADER_VERTEX].compile_time_consts_data, &ctx->common_data[MESA_SHADER_VERTEX].ubo_data, &vert_explicit_const_usage, gfx_pipeline->base.layout, PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY, - &gfx_pipeline->vertex_shader_state.descriptor_state.pds_code, - &gfx_pipeline->vertex_shader_state.descriptor_state.pds_info); + &gfx_pipeline->vertex_shader_state.descriptor_state); if (result != VK_SUCCESS) goto err_free_vertex_attrib_program; @@ -1610,12 +1659,12 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device, result = pvr_pds_descriptor_program_create_and_upload( device, allocator, + &ctx->common_data[MESA_SHADER_FRAGMENT].compile_time_consts_data, &ctx->common_data[MESA_SHADER_FRAGMENT].ubo_data, &frag_explicit_const_usage, gfx_pipeline->base.layout, PVR_STAGE_ALLOCATION_FRAGMENT, - &gfx_pipeline->fragment_shader_state.descriptor_state.pds_code, - &gfx_pipeline->fragment_shader_state.descriptor_state.pds_info); + &gfx_pipeline->fragment_shader_state.descriptor_state); if (result != VK_SUCCESS) goto err_free_vertex_descriptor_program; @@ -1629,8 +1678,7 @@ err_free_vertex_descriptor_program: pvr_pds_descriptor_program_destroy( device, allocator, - &gfx_pipeline->vertex_shader_state.descriptor_state.pds_code, - &gfx_pipeline->vertex_shader_state.descriptor_state.pds_info); + &gfx_pipeline->vertex_shader_state.descriptor_state); err_free_vertex_attrib_program: for (uint32_t i = 0; i < ARRAY_SIZE(gfx_pipeline->vertex_shader_state.pds_attrib_programs); diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index 59db32b..39850b3 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -970,6 +970,9 @@ struct pvr_stage_allocation_descriptor_state { * referring to the code upload. */ struct pvr_pds_info pds_info; + + /* Already setup compile time static consts. */ + struct pvr_bo *static_consts; }; struct pvr_pds_attrib_program { -- 2.7.4