From 8b319c6db8bd93603b18bd783eb75225fcfd51b7 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Fri, 26 May 2023 12:56:20 +0100 Subject: [PATCH] radv: reformat according to its .clang-format MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Eric Engestrom Reviewed-by: Daniel Schürmann Part-of: --- src/amd/vulkan/bvh/build_helpers.h | 47 +- src/amd/vulkan/bvh/build_interface.h | 2 +- src/amd/vulkan/bvh/bvh.h | 6 +- src/amd/vulkan/layers/radv_rage2.c | 5 +- src/amd/vulkan/layers/radv_rmv_layer.c | 12 +- src/amd/vulkan/layers/radv_rra_layer.c | 58 +- src/amd/vulkan/layers/radv_sqtt_layer.c | 320 ++- src/amd/vulkan/meta/radv_meta.c | 57 +- src/amd/vulkan/meta/radv_meta.h | 66 +- src/amd/vulkan/meta/radv_meta_blit.c | 234 +- src/amd/vulkan/meta/radv_meta_blit2d.c | 428 ++-- src/amd/vulkan/meta/radv_meta_buffer.c | 115 +- src/amd/vulkan/meta/radv_meta_bufimage.c | 873 ++++--- src/amd/vulkan/meta/radv_meta_clear.c | 825 +++---- src/amd/vulkan/meta/radv_meta_copy.c | 155 +- src/amd/vulkan/meta/radv_meta_copy_vrs_htile.c | 136 +- src/amd/vulkan/meta/radv_meta_dcc_retile.c | 109 +- src/amd/vulkan/meta/radv_meta_decompress.c | 208 +- src/amd/vulkan/meta/radv_meta_etc_decode.c | 402 ++-- src/amd/vulkan/meta/radv_meta_fast_clear.c | 396 ++-- src/amd/vulkan/meta/radv_meta_fmask_copy.c | 149 +- src/amd/vulkan/meta/radv_meta_fmask_expand.c | 105 +- src/amd/vulkan/meta/radv_meta_resolve.c | 177 +- src/amd/vulkan/meta/radv_meta_resolve_cs.c | 359 ++- src/amd/vulkan/meta/radv_meta_resolve_fs.c | 249 +- src/amd/vulkan/nir/radv_nir.h | 15 +- .../vulkan/nir/radv_nir_apply_pipeline_layout.c | 101 +- src/amd/vulkan/nir/radv_nir_lower_abi.c | 114 +- src/amd/vulkan/nir/radv_nir_lower_fs_barycentric.c | 36 +- src/amd/vulkan/nir/radv_nir_lower_fs_intrinsics.c | 33 +- src/amd/vulkan/nir/radv_nir_lower_io.c | 31 +- .../nir/radv_nir_lower_primitive_shading_rate.c | 6 +- src/amd/vulkan/nir/radv_nir_lower_ray_queries.c | 227 +- src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c | 57 +- src/amd/vulkan/radv_acceleration_structure.c | 453 ++-- src/amd/vulkan/radv_aco_shader_info.h | 16 +- src/amd/vulkan/radv_android.c | 89 +- src/amd/vulkan/radv_buffer.c | 50 +- src/amd/vulkan/radv_cmd_buffer.c | 2403 ++++++++------------ src/amd/vulkan/radv_constants.h | 13 +- src/amd/vulkan/radv_cp_reg_shadowing.c | 22 +- src/amd/vulkan/radv_cs.h | 12 +- src/amd/vulkan/radv_debug.c | 166 +- src/amd/vulkan/radv_descriptor_set.c | 433 ++-- src/amd/vulkan/radv_descriptor_set.h | 9 +- src/amd/vulkan/radv_device.c | 343 ++- src/amd/vulkan/radv_device_generated_commands.c | 511 ++--- src/amd/vulkan/radv_device_memory.c | 60 +- src/amd/vulkan/radv_event.c | 17 +- src/amd/vulkan/radv_formats.c | 300 +-- src/amd/vulkan/radv_image.c | 596 ++--- src/amd/vulkan/radv_instance.c | 160 +- src/amd/vulkan/radv_llvm_helper.cpp | 10 +- src/amd/vulkan/radv_llvm_helper.h | 3 +- src/amd/vulkan/radv_nir_to_llvm.c | 85 +- src/amd/vulkan/radv_perfcounter.c | 265 +-- src/amd/vulkan/radv_physical_device.c | 338 ++- src/amd/vulkan/radv_pipeline.c | 129 +- src/amd/vulkan/radv_pipeline_cache.c | 137 +- src/amd/vulkan/radv_pipeline_compute.c | 81 +- src/amd/vulkan/radv_pipeline_graphics.c | 991 +++----- src/amd/vulkan/radv_pipeline_rt.c | 141 +- src/amd/vulkan/radv_private.h | 688 +++--- src/amd/vulkan/radv_query.c | 390 ++-- src/amd/vulkan/radv_queue.c | 397 ++-- src/amd/vulkan/radv_radeon_winsys.h | 64 +- src/amd/vulkan/radv_rmv.c | 114 +- src/amd/vulkan/radv_rra.c | 142 +- src/amd/vulkan/radv_rt_common.c | 348 ++- src/amd/vulkan/radv_rt_common.h | 23 +- src/amd/vulkan/radv_rt_shader.c | 425 ++-- src/amd/vulkan/radv_sampler.c | 78 +- src/amd/vulkan/radv_sdma_copy_image.c | 49 +- src/amd/vulkan/radv_shader.c | 411 ++-- src/amd/vulkan/radv_shader.h | 99 +- src/amd/vulkan/radv_shader_args.c | 75 +- src/amd/vulkan/radv_shader_args.h | 3 +- src/amd/vulkan/radv_shader_info.c | 289 +-- src/amd/vulkan/radv_spm.c | 70 +- src/amd/vulkan/radv_sqtt.c | 208 +- src/amd/vulkan/radv_video.c | 501 ++-- src/amd/vulkan/radv_wsi.c | 12 +- src/amd/vulkan/si_cmd_buffer.c | 458 ++-- src/amd/vulkan/vk_format.h | 6 +- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 101 +- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 183 +- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c | 5 +- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c | 9 +- .../winsys/amdgpu/radv_amdgpu_winsys_public.h | 3 +- src/amd/vulkan/winsys/null/radv_null_bo.c | 7 +- src/amd/vulkan/winsys/null/radv_null_cs.c | 3 +- src/amd/vulkan/winsys/null/radv_null_winsys.c | 14 +- 92 files changed, 7760 insertions(+), 11361 deletions(-) diff --git a/src/amd/vulkan/bvh/build_helpers.h b/src/amd/vulkan/bvh/build_helpers.h index bf527ce..30d3127 100644 --- a/src/amd/vulkan/bvh/build_helpers.h +++ b/src/amd/vulkan/bvh/build_helpers.h @@ -162,10 +162,10 @@ #define VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR 4 #define VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR 8 -#define TYPE(type, align) \ - layout(buffer_reference, buffer_reference_align = align, scalar) buffer type##_ref \ - { \ - type value; \ +#define TYPE(type, align) \ + layout(buffer_reference, buffer_reference_align = align, scalar) buffer type##_ref \ + { \ + type value; \ }; #define REF(type) type##_ref @@ -173,7 +173,7 @@ #define NULL 0 #define DEREF(var) var.value -#define SIZEOF(type) uint32_t(uint64_t(REF(type)(uint64_t(0))+1)) +#define SIZEOF(type) uint32_t(uint64_t(REF(type)(uint64_t(0)) + 1)) #define OFFSET(ptr, offset) (uint64_t(ptr) + offset) @@ -323,10 +323,10 @@ calculate_instance_node_bounds(uint64_t base_ptr, mat3x4 otw_matrix) aabb.min[comp] = otw_matrix[comp][3]; aabb.max[comp] = otw_matrix[comp][3]; for (uint32_t col = 0; col < 3; ++col) { - aabb.min[comp] += min(otw_matrix[comp][col] * header.aabb.min[col], - otw_matrix[comp][col] * header.aabb.max[col]); - aabb.max[comp] += max(otw_matrix[comp][col] * header.aabb.min[col], - otw_matrix[comp][col] * header.aabb.max[col]); + aabb.min[comp] += + min(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]); + aabb.max[comp] += + max(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]); } } return aabb; @@ -400,9 +400,8 @@ fetch_task(REF(radv_ir_header) header, bool did_work) do { /* Perform a memory barrier to refresh the current phase's end counter, in case * another workgroup changed it. */ - memoryBarrier( - gl_ScopeDevice, gl_StorageSemanticsBuffer, - gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); + memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer, + gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); /* The first invocation of the first workgroup in a new phase is responsible to initiate the * switch to a new phase. It is only possible to switch to a new phase if all tasks of the @@ -410,22 +409,18 @@ fetch_task(REF(radv_ir_header) header, bool did_work) * end counter in turn notifies all invocations for that phase that it is safe to execute. */ if (global_task_index == DEREF(header).sync_data.current_phase_end_counter && - DEREF(header).sync_data.task_done_counter == - DEREF(header).sync_data.current_phase_end_counter) { + DEREF(header).sync_data.task_done_counter == DEREF(header).sync_data.current_phase_end_counter) { if (DEREF(header).sync_data.next_phase_exit_flag != 0) { DEREF(header).sync_data.phase_index = TASK_INDEX_INVALID; - memoryBarrier( - gl_ScopeDevice, gl_StorageSemanticsBuffer, - gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); + memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer, + gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); } else { atomicAdd(DEREF(header).sync_data.phase_index, 1); - DEREF(header).sync_data.current_phase_start_counter = - DEREF(header).sync_data.current_phase_end_counter; + DEREF(header).sync_data.current_phase_start_counter = DEREF(header).sync_data.current_phase_end_counter; /* Ensure the changes to the phase index and start/end counter are visible for other * workgroup waiting in the loop. */ - memoryBarrier( - gl_ScopeDevice, gl_StorageSemanticsBuffer, - gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); + memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer, + gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); atomicAdd(DEREF(header).sync_data.current_phase_end_counter, DIV_ROUND_UP(task_count(header), gl_WorkGroupSize.x)); } @@ -447,8 +442,7 @@ fetch_task(REF(radv_ir_header) header, bool did_work) num_tasks_to_skip = shared_phase_index - phase_index; - uint32_t local_task_index = - global_task_index - DEREF(header).sync_data.current_phase_start_counter; + uint32_t local_task_index = global_task_index - DEREF(header).sync_data.current_phase_start_counter; return local_task_index * gl_WorkGroupSize.x + gl_LocalInvocationID.x; } @@ -464,9 +458,8 @@ should_execute_phase() return true; } -#define PHASE(header) \ - for (; task_index != TASK_INDEX_INVALID && should_execute_phase(); \ - task_index = fetch_task(header, true)) +#define PHASE(header) \ + for (; task_index != TASK_INDEX_INVALID && should_execute_phase(); task_index = fetch_task(header, true)) #endif #endif diff --git a/src/amd/vulkan/bvh/build_interface.h b/src/amd/vulkan/bvh/build_interface.h index 3319e16..6a1ee79 100644 --- a/src/amd/vulkan/bvh/build_interface.h +++ b/src/amd/vulkan/bvh/build_interface.h @@ -58,7 +58,7 @@ struct morton_args { }; #define LBVH_RIGHT_CHILD_BIT_SHIFT 29 -#define LBVH_RIGHT_CHILD_BIT (1 << LBVH_RIGHT_CHILD_BIT_SHIFT) +#define LBVH_RIGHT_CHILD_BIT (1 << LBVH_RIGHT_CHILD_BIT_SHIFT) struct lbvh_node_info { /* Number of children that have been processed (or are invalid/leaves) in diff --git a/src/amd/vulkan/bvh/bvh.h b/src/amd/vulkan/bvh/bvh.h index 50ba329..3687cca 100644 --- a/src/amd/vulkan/bvh/bvh.h +++ b/src/amd/vulkan/bvh/bvh.h @@ -28,12 +28,12 @@ #define radv_bvh_node_box16 4 #define radv_bvh_node_box32 5 #define radv_bvh_node_instance 6 -#define radv_bvh_node_aabb 7 +#define radv_bvh_node_aabb 7 #define radv_ir_node_triangle 0 #define radv_ir_node_internal 1 #define radv_ir_node_instance 2 -#define radv_ir_node_aabb 3 +#define radv_ir_node_aabb 3 #define RADV_GEOMETRY_OPAQUE (1u << 31) @@ -216,7 +216,7 @@ struct radv_bvh_box32_node { uint32_t reserved[4]; }; -#define RADV_BVH_ROOT_NODE radv_bvh_node_box32 +#define RADV_BVH_ROOT_NODE radv_bvh_node_box32 #define RADV_BVH_INVALID_NODE 0xffffffffu /* If the task index is set to this value, there is no diff --git a/src/amd/vulkan/layers/radv_rage2.c b/src/amd/vulkan/layers/radv_rage2.c index 036bd9b..471b23e 100644 --- a/src/amd/vulkan/layers/radv_rage2.c +++ b/src/amd/vulkan/layers/radv_rage2.c @@ -22,12 +22,11 @@ */ #include "radv_private.h" -#include "vk_framebuffer.h" #include "vk_common_entrypoints.h" +#include "vk_framebuffer.h" VKAPI_ATTR void VKAPI_CALL -rage2_CmdBeginRenderPass(VkCommandBuffer commandBuffer, - const VkRenderPassBeginInfo* pRenderPassBegin, +rage2_CmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo *pRenderPassBegin, VkSubpassContents contents) { VK_FROM_HANDLE(vk_framebuffer, framebuffer, pRenderPassBegin->framebuffer); diff --git a/src/amd/vulkan/layers/radv_rmv_layer.c b/src/amd/vulkan/layers/radv_rmv_layer.c index 6fd48bd..a7b7266 100644 --- a/src/amd/vulkan/layers/radv_rmv_layer.c +++ b/src/amd/vulkan/layers/radv_rmv_layer.c @@ -48,13 +48,11 @@ rmv_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo) } VKAPI_ATTR VkResult VKAPI_CALL -rmv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, - const VkMappedMemoryRange *pMemoryRanges) +rmv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges) { RADV_FROM_HANDLE(radv_device, device, _device); - VkResult res = - device->layer_dispatch.rmv.FlushMappedMemoryRanges(_device, memoryRangeCount, pMemoryRanges); + VkResult res = device->layer_dispatch.rmv.FlushMappedMemoryRanges(_device, memoryRangeCount, pMemoryRanges); if (res != VK_SUCCESS || !device->vk.memory_trace_data.is_enabled) return res; @@ -64,13 +62,11 @@ rmv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, } VKAPI_ATTR VkResult VKAPI_CALL -rmv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, - const VkMappedMemoryRange *pMemoryRanges) +rmv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges) { RADV_FROM_HANDLE(radv_device, device, _device); - VkResult res = device->layer_dispatch.rmv.InvalidateMappedMemoryRanges(_device, memoryRangeCount, - pMemoryRanges); + VkResult res = device->layer_dispatch.rmv.InvalidateMappedMemoryRanges(_device, memoryRangeCount, pMemoryRanges); if (res != VK_SUCCESS || !device->vk.memory_trace_data.is_enabled) return res; diff --git a/src/amd/vulkan/layers/radv_rra_layer.c b/src/amd/vulkan/layers/radv_rra_layer.c index c09d586..bb5abf8 100644 --- a/src/amd/vulkan/layers/radv_rra_layer.c +++ b/src/amd/vulkan/layers/radv_rra_layer.c @@ -21,8 +21,8 @@ * IN THE SOFTWARE. */ -#include "util/u_process.h" #include "meta/radv_meta.h" +#include "util/u_process.h" #include "radv_private.h" #include "vk_acceleration_structure.h" #include "vk_common_entrypoints.h" @@ -37,15 +37,13 @@ radv_rra_handle_trace(VkQueue _queue) /* * TODO: This code is shared with RGP tracing and could be merged in a common helper. */ - bool frame_trigger = - queue->device->rra_trace.elapsed_frames == queue->device->rra_trace.trace_frame; + bool frame_trigger = queue->device->rra_trace.elapsed_frames == queue->device->rra_trace.trace_frame; if (queue->device->rra_trace.elapsed_frames <= queue->device->rra_trace.trace_frame) ++queue->device->rra_trace.elapsed_frames; bool file_trigger = false; #ifndef _WIN32 - if (queue->device->rra_trace.trigger_file && - access(queue->device->rra_trace.trigger_file, W_OK) == 0) { + if (queue->device->rra_trace.trigger_file && access(queue->device->rra_trace.trigger_file, W_OK) == 0) { if (unlink(queue->device->rra_trace.trigger_file) == 0) { file_trigger = true; } else { @@ -74,9 +72,8 @@ radv_rra_handle_trace(VkQueue _queue) t = time(NULL); now = *localtime(&t); - snprintf(filename, sizeof(filename), "/tmp/%s_%04d.%02d.%02d_%02d.%02d.%02d.rra", - util_get_process_name(), 1900 + now.tm_year, now.tm_mon + 1, now.tm_mday, now.tm_hour, - now.tm_min, now.tm_sec); + snprintf(filename, sizeof(filename), "/tmp/%s_%04d.%02d.%02d_%02d.%02d.%02d.rra", util_get_process_name(), + 1900 + now.tm_year, now.tm_mon + 1, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec); VkResult result = radv_rra_dump_trace(_queue, filename); @@ -159,16 +156,15 @@ fail_buffer: } VKAPI_ATTR VkResult VKAPI_CALL -rra_CreateAccelerationStructureKHR(VkDevice _device, - const VkAccelerationStructureCreateInfoKHR *pCreateInfo, +rra_CreateAccelerationStructureKHR(VkDevice _device, const VkAccelerationStructureCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkAccelerationStructureKHR *pAccelerationStructure) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer); - VkResult result = device->layer_dispatch.rra.CreateAccelerationStructureKHR( - _device, pCreateInfo, pAllocator, pAccelerationStructure); + VkResult result = device->layer_dispatch.rra.CreateAccelerationStructureKHR(_device, pCreateInfo, pAllocator, + pAccelerationStructure); if (result != VK_SUCCESS) return result; @@ -212,8 +208,7 @@ fail_event: fail_data: free(data); fail_as: - device->layer_dispatch.rra.DestroyAccelerationStructureKHR(_device, *pAccelerationStructure, - pAllocator); + device->layer_dispatch.rra.DestroyAccelerationStructureKHR(_device, *pAccelerationStructure, pAllocator); *pAccelerationStructure = VK_NULL_HANDLE; exit: simple_mtx_unlock(&device->rra_trace.data_mtx); @@ -221,8 +216,7 @@ exit: } static void -handle_accel_struct_write(VkCommandBuffer commandBuffer, - struct vk_acceleration_structure *accel_struct, +handle_accel_struct_write(VkCommandBuffer commandBuffer, struct vk_acceleration_structure *accel_struct, struct radv_rra_accel_struct_data *data) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); @@ -247,8 +241,7 @@ handle_accel_struct_write(VkCommandBuffer commandBuffer, if (!data->va) { data->va = vk_acceleration_structure_get_va(accel_struct); - _mesa_hash_table_u64_insert(cmd_buffer->device->rra_trace.accel_struct_vas, data->va, - accel_struct); + _mesa_hash_table_u64_insert(cmd_buffer->device->rra_trace.accel_struct_vas, data->va, accel_struct); } if (!data->buffer) @@ -272,20 +265,18 @@ handle_accel_struct_write(VkCommandBuffer commandBuffer, } VKAPI_ATTR void VKAPI_CALL -rra_CmdBuildAccelerationStructuresKHR( - VkCommandBuffer commandBuffer, uint32_t infoCount, - const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, - const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos) +rra_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, + const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - cmd_buffer->device->layer_dispatch.rra.CmdBuildAccelerationStructuresKHR( - commandBuffer, infoCount, pInfos, ppBuildRangeInfos); + cmd_buffer->device->layer_dispatch.rra.CmdBuildAccelerationStructuresKHR(commandBuffer, infoCount, pInfos, + ppBuildRangeInfos); simple_mtx_lock(&cmd_buffer->device->rra_trace.data_mtx); for (uint32_t i = 0; i < infoCount; ++i) { RADV_FROM_HANDLE(vk_acceleration_structure, structure, pInfos[i].dstAccelerationStructure); - struct hash_entry *entry = _mesa_hash_table_search( - cmd_buffer->device->rra_trace.accel_structs, structure); + struct hash_entry *entry = _mesa_hash_table_search(cmd_buffer->device->rra_trace.accel_structs, structure); assert(entry); struct radv_rra_accel_struct_data *data = entry->data; @@ -296,8 +287,7 @@ rra_CmdBuildAccelerationStructuresKHR( } VKAPI_ATTR void VKAPI_CALL -rra_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, - const VkCopyAccelerationStructureInfoKHR *pInfo) +rra_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); cmd_buffer->device->layer_dispatch.rra.CmdCopyAccelerationStructureKHR(commandBuffer, pInfo); @@ -305,8 +295,7 @@ rra_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, simple_mtx_lock(&cmd_buffer->device->rra_trace.data_mtx); RADV_FROM_HANDLE(vk_acceleration_structure, structure, pInfo->dst); - struct hash_entry *entry = - _mesa_hash_table_search(cmd_buffer->device->rra_trace.accel_structs, structure); + struct hash_entry *entry = _mesa_hash_table_search(cmd_buffer->device->rra_trace.accel_structs, structure); assert(entry); struct radv_rra_accel_struct_data *data = entry->data; @@ -321,14 +310,12 @@ rra_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - cmd_buffer->device->layer_dispatch.rra.CmdCopyMemoryToAccelerationStructureKHR(commandBuffer, - pInfo); + cmd_buffer->device->layer_dispatch.rra.CmdCopyMemoryToAccelerationStructureKHR(commandBuffer, pInfo); simple_mtx_lock(&cmd_buffer->device->rra_trace.data_mtx); RADV_FROM_HANDLE(vk_acceleration_structure, structure, pInfo->dst); - struct hash_entry *entry = - _mesa_hash_table_search(cmd_buffer->device->rra_trace.accel_structs, structure); + struct hash_entry *entry = _mesa_hash_table_search(cmd_buffer->device->rra_trace.accel_structs, structure); assert(entry); struct radv_rra_accel_struct_data *data = entry->data; @@ -350,8 +337,7 @@ rra_DestroyAccelerationStructureKHR(VkDevice _device, VkAccelerationStructureKHR RADV_FROM_HANDLE(vk_acceleration_structure, structure, _structure); - struct hash_entry *entry = - _mesa_hash_table_search(device->rra_trace.accel_structs, structure); + struct hash_entry *entry = _mesa_hash_table_search(device->rra_trace.accel_structs, structure); assert(entry); struct radv_rra_accel_struct_data *data = entry->data; diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c index acad0f4..7ea88f3 100644 --- a/src/amd/vulkan/layers/radv_sqtt_layer.c +++ b/src/amd/vulkan/layers/radv_sqtt_layer.c @@ -21,18 +21,17 @@ * IN THE SOFTWARE. */ -#include "vk_common_entrypoints.h" -#include "wsi_common_entrypoints.h" #include "radv_cs.h" #include "radv_private.h" #include "radv_shader.h" +#include "vk_common_entrypoints.h" +#include "wsi_common_entrypoints.h" #include "ac_rgp.h" #include "ac_sqtt.h" void -radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, - struct radv_graphics_pipeline *pipeline) +radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline) { const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level; struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc; @@ -141,8 +140,7 @@ radv_sqtt_shader_get_va_reloc(struct radv_pipeline *pipeline, gl_shader_stage st } static VkResult -radv_sqtt_reloc_graphics_shaders(struct radv_device *device, - struct radv_graphics_pipeline *pipeline) +radv_sqtt_reloc_graphics_shaders(struct radv_device *device, struct radv_graphics_pipeline *pipeline) { struct radv_shader_dma_submission *submission = NULL; struct radv_sqtt_shaders_reloc *reloc; @@ -176,8 +174,7 @@ radv_sqtt_reloc_graphics_shaders(struct radv_device *device, uint64_t offset = 0; if (device->shader_use_invisible_vram) { - submission = - radv_shader_dma_get_submission(device, reloc->bo, slab_va, code_size); + submission = radv_shader_dma_get_submission(device, reloc->bo, slab_va, code_size); if (!submission) return VK_ERROR_UNKNOWN; } @@ -211,8 +208,7 @@ radv_sqtt_reloc_graphics_shaders(struct radv_device *device, } static void -radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, - enum rgp_sqtt_marker_general_api_type api_type) +radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type) { struct rgp_sqtt_marker_general_api marker = {0}; @@ -223,8 +219,7 @@ radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, } static void -radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer, - enum rgp_sqtt_marker_general_api_type api_type) +radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type) { struct rgp_sqtt_marker_general_api marker = {0}; @@ -236,9 +231,9 @@ radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer, } static void -radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer, - enum rgp_sqtt_marker_event_type api_type, uint32_t vertex_offset_user_data, - uint32_t instance_offset_user_data, uint32_t draw_index_user_data) +radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type, + uint32_t vertex_offset_user_data, uint32_t instance_offset_user_data, + uint32_t draw_index_user_data) { struct rgp_sqtt_marker_event marker = {0}; @@ -263,9 +258,8 @@ radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer, } static void -radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer, - enum rgp_sqtt_marker_event_type api_type, uint32_t x, uint32_t y, - uint32_t z) +radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type, + uint32_t x, uint32_t y, uint32_t z) { struct rgp_sqtt_marker_event_with_dims marker = {0}; @@ -283,8 +277,8 @@ radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer, } static void -radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, - enum rgp_sqtt_marker_user_event_type type, const char *str) +radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_user_event_type type, + const char *str) { if (type == UserEventPop) { assert(str == NULL); @@ -320,10 +314,8 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) return; /* Reserve a command buffer ID for SQTT. */ - enum amd_ip_type ip_type = - radv_queue_family_to_ring(cmd_buffer->device->physical_device, cmd_buffer->qf); - union rgp_sqtt_marker_cb_id cb_id = - ac_sqtt_get_next_cmdbuf_id(&cmd_buffer->device->sqtt, ip_type); + enum amd_ip_type ip_type = radv_queue_family_to_ring(cmd_buffer->device->physical_device, cmd_buffer->qf); + union rgp_sqtt_marker_cb_id cb_id = ac_sqtt_get_next_cmdbuf_id(&cmd_buffer->device->sqtt, ip_type); cmd_buffer->sqtt_cb_id = cb_id.all; marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START; @@ -362,8 +354,7 @@ radv_describe_draw(struct radv_cmd_buffer *cmd_buffer) if (likely(!cmd_buffer->device->sqtt.bo)) return; - radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, - UINT_MAX); + radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX); } void @@ -376,12 +367,10 @@ radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z) } void -radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, - VkImageAspectFlagBits aspects) +radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlagBits aspects) { - cmd_buffer->state.current_event_type = (aspects & VK_IMAGE_ASPECT_COLOR_BIT) - ? EventRenderPassColorClear - : EventRenderPassDepthStencilClear; + cmd_buffer->state.current_event_type = + (aspects & VK_IMAGE_ASPECT_COLOR_BIT) ? EventRenderPassColorClear : EventRenderPassDepthStencilClear; } void @@ -487,8 +476,7 @@ radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer) } void -radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, - const struct radv_barrier_data *barrier) +radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier) { struct rgp_sqtt_marker_layout_transition marker = {0}; @@ -516,8 +504,8 @@ radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, } static void -radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint pipelineBindPoint, struct radv_pipeline *pipeline) +radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint, + struct radv_pipeline *pipeline) { struct rgp_sqtt_marker_pipeline_bind marker = {0}; @@ -622,66 +610,60 @@ sqtt_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo) return VK_SUCCESS; } -#define EVENT_MARKER_BASE(cmd_name, api_name, event_name, ...) \ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \ - radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \ - cmd_buffer->state.current_event_type = EventCmd##event_name; \ - cmd_buffer->device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \ - cmd_buffer->state.current_event_type = EventInternalUnknown; \ +#define EVENT_MARKER_BASE(cmd_name, api_name, event_name, ...) \ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \ + radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \ + cmd_buffer->state.current_event_type = EventCmd##event_name; \ + cmd_buffer->device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \ + cmd_buffer->state.current_event_type = EventInternalUnknown; \ radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name); -#define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) \ - EVENT_MARKER_BASE(cmd_name, api_name, api_name, __VA_ARGS__); +#define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) EVENT_MARKER_BASE(cmd_name, api_name, api_name, __VA_ARGS__); -#define EVENT_MARKER(cmd_name, ...) \ - EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__); +#define EVENT_MARKER(cmd_name, ...) EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__); VKAPI_ATTR void VKAPI_CALL -sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, - uint32_t firstVertex, uint32_t firstInstance) +sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, + uint32_t firstInstance) { EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, - uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) +sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, + int32_t vertexOffset, uint32_t firstInstance) { - EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, - firstInstance); + EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, - uint32_t drawCount, uint32_t stride) +sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, + uint32_t stride) { EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, stride); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, - uint32_t drawCount, uint32_t stride) +sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, + uint32_t stride) { EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, drawCount, stride); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, - VkBuffer countBuffer, VkDeviceSize countBufferOffset, - uint32_t maxDrawCount, uint32_t stride) +sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, + VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) { - EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, - maxDrawCount, stride); + EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, - VkDeviceSize offset, VkBuffer countBuffer, - VkDeviceSize countBufferOffset, uint32_t maxDrawCount, +sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, + VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) { - EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer, - countBufferOffset, maxDrawCount, stride); + EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, + stride); } VKAPI_ATTR void VKAPI_CALL @@ -703,15 +685,15 @@ sqtt_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCop } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, - VkDeviceSize fillSize, uint32_t data) +sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize fillSize, + uint32_t data) { EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, data); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, - VkDeviceSize dataSize, const void *pData) +sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, + const void *pData) { EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, dataSize, pData); } @@ -723,19 +705,15 @@ sqtt_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyI } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, - const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) +sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) { - EVENT_MARKER_ALIAS(CopyBufferToImage2, CopyBufferToImage, commandBuffer, - pCopyBufferToImageInfo); + EVENT_MARKER_ALIAS(CopyBufferToImage2, CopyBufferToImage, commandBuffer, pCopyBufferToImageInfo); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, - const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo) +sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo) { - EVENT_MARKER_ALIAS(CopyImageToBuffer2, CopyImageToBuffer, commandBuffer, - pCopyImageToBufferInfo); + EVENT_MARKER_ALIAS(CopyImageToBuffer2, CopyImageToBuffer, commandBuffer, pCopyImageToBufferInfo); } VKAPI_ATTR void VKAPI_CALL @@ -746,81 +724,70 @@ sqtt_CmdBlitImage2(VkCommandBuffer commandBuffer, const VkBlitImageInfo2 *pBlitI VKAPI_ATTR void VKAPI_CALL sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout, - const VkClearColorValue *pColor, uint32_t rangeCount, - const VkImageSubresourceRange *pRanges) + const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges) { EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, pColor, rangeCount, pRanges); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, - VkImageLayout imageLayout, +sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout, const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange *pRanges) { - EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil, - rangeCount, pRanges); + EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil, rangeCount, pRanges); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, - const VkClearAttachment *pAttachments, uint32_t rectCount, - const VkClearRect *pRects) +sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments, + uint32_t rectCount, const VkClearRect *pRects) { EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer, - const VkResolveImageInfo2 *pResolveImageInfo) +sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 *pResolveImageInfo) { EVENT_MARKER_ALIAS(ResolveImage2, ResolveImage, commandBuffer, pResolveImageInfo); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, - const VkDependencyInfo* pDependencyInfos) +sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents, + const VkDependencyInfo *pDependencyInfos) { - EVENT_MARKER_ALIAS(WaitEvents2, WaitEvents, commandBuffer, eventCount, pEvents, - pDependencyInfos); + EVENT_MARKER_ALIAS(WaitEvents2, WaitEvents, commandBuffer, eventCount, pEvents, pDependencyInfos); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, - const VkDependencyInfo* pDependencyInfo) +sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, const VkDependencyInfo *pDependencyInfo) { EVENT_MARKER_ALIAS(PipelineBarrier2, PipelineBarrier, commandBuffer, pDependencyInfo); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, - uint32_t queryCount) +sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount) { EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, queryCount); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, - uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer, - VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags) +sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, + uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride, + VkQueryResultFlags flags) { - EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer, - dstOffset, stride, flags); + EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer, dstOffset, stride, + flags); } -#define EVENT_RT_MARKER(cmd_name, ...) \ - EVENT_MARKER_BASE(cmd_name, Dispatch, cmd_name, __VA_ARGS__); +#define EVENT_RT_MARKER(cmd_name, ...) EVENT_MARKER_BASE(cmd_name, Dispatch, cmd_name, __VA_ARGS__); -#define EVENT_RT_MARKER_ALIAS(cmd_name, event_name, ...) \ - EVENT_MARKER_BASE(cmd_name, Dispatch, event_name, __VA_ARGS__); +#define EVENT_RT_MARKER_ALIAS(cmd_name, event_name, ...) EVENT_MARKER_BASE(cmd_name, Dispatch, event_name, __VA_ARGS__); VKAPI_ATTR void VKAPI_CALL -sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, - const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable, +sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable, const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable, const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable, - const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, - uint32_t width, uint32_t height, uint32_t depth) + const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, uint32_t width, + uint32_t height, uint32_t depth) { EVENT_RT_MARKER(TraceRaysKHR, commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth); @@ -834,16 +801,14 @@ sqtt_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer, const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, VkDeviceAddress indirectDeviceAddress) { - EVENT_RT_MARKER(TraceRaysIndirectKHR, commandBuffer, pRaygenShaderBindingTable, - pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, - indirectDeviceAddress); + EVENT_RT_MARKER(TraceRaysIndirectKHR, commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, + pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress); } VKAPI_ATTR void VKAPI_CALL sqtt_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer, VkDeviceAddress indirectDeviceAddress) { - EVENT_RT_MARKER_ALIAS(TraceRaysIndirect2KHR, TraceRaysIndirectKHR, commandBuffer, - indirectDeviceAddress); + EVENT_RT_MARKER_ALIAS(TraceRaysIndirect2KHR, TraceRaysIndirectKHR, commandBuffer, indirectDeviceAddress); } VKAPI_ATTR void VKAPI_CALL @@ -851,13 +816,11 @@ sqtt_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t i const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos) { - EVENT_RT_MARKER(BuildAccelerationStructuresKHR, commandBuffer, infoCount, pInfos, - ppBuildRangeInfos); + EVENT_RT_MARKER(BuildAccelerationStructuresKHR, commandBuffer, infoCount, pInfos, ppBuildRangeInfos); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, - const VkCopyAccelerationStructureInfoKHR *pInfo) +sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo) { EVENT_RT_MARKER(CopyAccelerationStructureKHR, commandBuffer, pInfo); } @@ -883,20 +846,19 @@ sqtt_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, - VkDeviceSize offset, uint32_t drawCount, uint32_t stride) +sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, + uint32_t drawCount, uint32_t stride) { EVENT_MARKER(DrawMeshTasksIndirectEXT, commandBuffer, buffer, offset, drawCount, stride); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, - VkDeviceSize offset, VkBuffer countBuffer, - VkDeviceSize countBufferOffset, uint32_t maxDrawCount, +sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, + VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) { - EVENT_MARKER(DrawMeshTasksIndirectCountEXT, commandBuffer, buffer, offset, countBuffer, - countBufferOffset, maxDrawCount, stride); + EVENT_MARKER(DrawMeshTasksIndirectCountEXT, commandBuffer, buffer, offset, countBuffer, countBufferOffset, + maxDrawCount, stride); } #undef EVENT_RT_MARKER_ALIAS @@ -906,17 +868,16 @@ sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer bu #undef EVENT_MARKER_ALIAS #undef EVENT_MARKER_BASE -#define API_MARKER_ALIAS(cmd_name, api_name, ...) \ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \ - radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \ - cmd_buffer->device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \ +#define API_MARKER_ALIAS(cmd_name, api_name, ...) \ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \ + radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \ + cmd_buffer->device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \ radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name); #define API_MARKER(cmd_name, ...) API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__); VKAPI_ATTR void VKAPI_CALL -sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, - VkPipeline _pipeline) +sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline) { RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); @@ -938,30 +899,27 @@ sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pi const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t *pDynamicOffsets) { - API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet, - descriptorSetCount, pDescriptorSets, dynamicOffsetCount, pDynamicOffsets); + API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet, descriptorSetCount, + pDescriptorSets, dynamicOffsetCount, pDynamicOffsets); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, - VkIndexType indexType) +sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType) { API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, - uint32_t bindingCount, const VkBuffer *pBuffers, - const VkDeviceSize *pOffsets, const VkDeviceSize* pSizes, - const VkDeviceSize* pStrides) +sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount, + const VkBuffer *pBuffers, const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes, + const VkDeviceSize *pStrides) { - API_MARKER_ALIAS(BindVertexBuffers2, BindVertexBuffers, commandBuffer, firstBinding, - bindingCount, pBuffers, pOffsets, pSizes, pStrides); + API_MARKER_ALIAS(BindVertexBuffers2, BindVertexBuffers, commandBuffer, firstBinding, bindingCount, pBuffers, + pOffsets, pSizes, pStrides); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, - VkQueryControlFlags flags) +sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags) { API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags); } @@ -973,23 +931,21 @@ sqtt_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, - VkQueryPool queryPool, uint32_t query) +sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkQueryPool queryPool, + uint32_t query) { API_MARKER_ALIAS(WriteTimestamp2, WriteTimestamp, commandBuffer, stage, queryPool, query); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, - VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, - const void *pValues) +sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, + uint32_t offset, uint32_t size, const void *pValues) { API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer, - const VkRenderingInfo *pRenderingInfo) +sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo) { API_MARKER_ALIAS(BeginRendering, BeginRenderPass, commandBuffer, pRenderingInfo); } @@ -1001,8 +957,7 @@ sqtt_CmdEndRendering(VkCommandBuffer commandBuffer) } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, - const VkCommandBuffer *pCmdBuffers) +sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers) { API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, pCmdBuffers); } @@ -1028,11 +983,10 @@ sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, - float depthBiasClamp, float depthBiasSlopeFactor) +sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp, + float depthBiasSlopeFactor) { - API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp, - depthBiasSlopeFactor); + API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor); } VKAPI_ATTR void VKAPI_CALL @@ -1048,30 +1002,26 @@ sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, floa } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, - uint32_t compareMask) +sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask) { API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, - uint32_t writeMask) +sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask) { API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask); } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, - uint32_t reference) +sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference) { API_MARKER(SetStencilReference, commandBuffer, faceMask, reference); } /* VK_EXT_debug_marker */ VKAPI_ATTR void VKAPI_CALL -sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer, - const VkDebugMarkerMarkerInfoEXT *pMarkerInfo) +sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); radv_write_user_event_marker(cmd_buffer, UserEventPush, pMarkerInfo->pMarkerName); @@ -1085,8 +1035,7 @@ sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer) } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer, - const VkDebugMarkerMarkerInfoEXT *pMarkerInfo) +sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pMarkerInfo->pMarkerName); @@ -1107,8 +1056,7 @@ sqtt_DebugMarkerSetObjectTagEXT(VkDevice device, const VkDebugMarkerObjectTagInf } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, - const VkDebugUtilsLabelEXT *pLabelInfo) +sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); radv_write_user_event_marker(cmd_buffer, UserEventPush, pLabelInfo->pLabelName); @@ -1126,8 +1074,7 @@ sqtt_CmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer) } VKAPI_ATTR void VKAPI_CALL -sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, - const VkDebugUtilsLabelEXT *pLabelInfo) +sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pLabelInfo->pLabelName); @@ -1272,9 +1219,7 @@ radv_unregister_pipeline(struct radv_device *device, struct radv_pipeline *pipel /* Destroy the PSO correlation record. */ simple_mtx_lock(&pso_correlation->lock); - list_for_each_entry_safe(struct rgp_pso_correlation_record, record, &pso_correlation->record, - list) - { + list_for_each_entry_safe (struct rgp_pso_correlation_record, record, &pso_correlation->record, list) { if (record->pipeline_hash[0] == pipeline->pipeline_hash) { pso_correlation->record_count--; list_del(&record->list); @@ -1286,8 +1231,7 @@ radv_unregister_pipeline(struct radv_device *device, struct radv_pipeline *pipel /* Destroy the code object loader record. */ simple_mtx_lock(&loader_events->lock); - list_for_each_entry_safe(struct rgp_loader_events_record, record, &loader_events->record, list) - { + list_for_each_entry_safe (struct rgp_loader_events_record, record, &loader_events->record, list) { if (record->code_object_hash[0] == pipeline->pipeline_hash) { loader_events->record_count--; list_del(&record->list); @@ -1299,8 +1243,7 @@ radv_unregister_pipeline(struct radv_device *device, struct radv_pipeline *pipel /* Destroy the code object record. */ simple_mtx_lock(&code_object->lock); - list_for_each_entry_safe(struct rgp_code_object_record, record, &code_object->record, list) - { + list_for_each_entry_safe (struct rgp_code_object_record, record, &code_object->record, list) { if (record->pipeline_hash[0] == pipeline->pipeline_hash) { code_object->record_count--; list_del(&record->list); @@ -1313,14 +1256,14 @@ radv_unregister_pipeline(struct radv_device *device, struct radv_pipeline *pipel VKAPI_ATTR VkResult VKAPI_CALL sqtt_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, - const VkGraphicsPipelineCreateInfo *pCreateInfos, - const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) + const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) { RADV_FROM_HANDLE(radv_device, device, _device); VkResult result; - result = device->layer_dispatch.rgp.CreateGraphicsPipelines( - _device, pipelineCache, count, pCreateInfos, pAllocator, pPipelines); + result = device->layer_dispatch.rgp.CreateGraphicsPipelines(_device, pipelineCache, count, pCreateInfos, pAllocator, + pPipelines); if (result != VK_SUCCESS) return result; @@ -1354,14 +1297,14 @@ fail: VKAPI_ATTR VkResult VKAPI_CALL sqtt_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, - const VkComputePipelineCreateInfo *pCreateInfos, - const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) + const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) { RADV_FROM_HANDLE(radv_device, device, _device); VkResult result; - result = device->layer_dispatch.rgp.CreateComputePipelines(_device, pipelineCache, count, - pCreateInfos, pAllocator, pPipelines); + result = device->layer_dispatch.rgp.CreateComputePipelines(_device, pipelineCache, count, pCreateInfos, pAllocator, + pPipelines); if (result != VK_SUCCESS) return result; @@ -1395,8 +1338,8 @@ sqtt_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR defer RADV_FROM_HANDLE(radv_device, device, _device); VkResult result; - result = device->layer_dispatch.rgp.CreateRayTracingPipelinesKHR( - _device, deferredOperation, pipelineCache, count, pCreateInfos, pAllocator, pPipelines); + result = device->layer_dispatch.rgp.CreateRayTracingPipelinesKHR(_device, deferredOperation, pipelineCache, count, + pCreateInfos, pAllocator, pPipelines); if (result != VK_SUCCESS) return result; @@ -1425,8 +1368,7 @@ fail: } VKAPI_ATTR void VKAPI_CALL -sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, - const VkAllocationCallbacks *pAllocator) +sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); diff --git a/src/amd/vulkan/meta/radv_meta.c b/src/amd/vulkan/meta/radv_meta.c index 9441f2c..db709d2 100644 --- a/src/amd/vulkan/meta/radv_meta.c +++ b/src/amd/vulkan/meta/radv_meta.c @@ -115,14 +115,11 @@ radv_resume_queries(const struct radv_meta_saved_state *state, struct radv_cmd_b } void -radv_meta_save(struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer, - uint32_t flags) +radv_meta_save(struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer, uint32_t flags) { - VkPipelineBindPoint bind_point = flags & RADV_META_SAVE_GRAPHICS_PIPELINE - ? VK_PIPELINE_BIND_POINT_GRAPHICS - : VK_PIPELINE_BIND_POINT_COMPUTE; - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); + VkPipelineBindPoint bind_point = + flags & RADV_META_SAVE_GRAPHICS_PIPELINE ? VK_PIPELINE_BIND_POINT_GRAPHICS : VK_PIPELINE_BIND_POINT_COMPUTE; + struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); assert(flags & (RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_COMPUTE_PIPELINE)); @@ -172,9 +169,8 @@ radv_meta_save(struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_ void radv_meta_restore(const struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer) { - VkPipelineBindPoint bind_point = state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE - ? VK_PIPELINE_BIND_POINT_GRAPHICS - : VK_PIPELINE_BIND_POINT_COMPUTE; + VkPipelineBindPoint bind_point = state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE ? VK_PIPELINE_BIND_POINT_GRAPHICS + : VK_PIPELINE_BIND_POINT_COMPUTE; if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) { if (state->old_graphics_pipeline) { @@ -211,8 +207,8 @@ radv_meta_restore(const struct radv_meta_saved_state *state, struct radv_cmd_buf if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) stages |= VK_SHADER_STAGE_ALL_GRAPHICS; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), VK_NULL_HANDLE, stages, 0, - MAX_PUSH_CONSTANTS_SIZE, state->push_constants); + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), VK_NULL_HANDLE, stages, 0, MAX_PUSH_CONSTANTS_SIZE, + state->push_constants); } if (state->flags & RADV_META_SAVE_RENDER) { @@ -246,8 +242,7 @@ radv_meta_get_view_type(const struct radv_image *image) * VkImageViewCreateInfo::subresourceRange::baseArrayLayer. */ uint32_t -radv_meta_get_iview_layer(const struct radv_image *dst_image, - const VkImageSubresourceLayers *dst_subresource, +radv_meta_get_iview_layer(const struct radv_image *dst_image, const VkImageSubresourceLayers *dst_subresource, const VkOffset3D *dst_offset) { switch (dst_image->vk.image_type) { @@ -266,7 +261,7 @@ radv_meta_get_iview_layer(const struct radv_image *dst_image, } } -static VKAPI_ATTR void * VKAPI_CALL +static VKAPI_ATTR void *VKAPI_CALL meta_alloc(void *_device, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) { struct radv_device *device = _device; @@ -274,9 +269,8 @@ meta_alloc(void *_device, size_t size, size_t alignment, VkSystemAllocationScope VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); } -static VKAPI_ATTR void * VKAPI_CALL -meta_realloc(void *_device, void *original, size_t size, size_t alignment, - VkSystemAllocationScope allocationScope) +static VKAPI_ATTR void *VKAPI_CALL +meta_realloc(void *_device, void *original, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) { struct radv_device *device = _device; return device->vk.alloc.pfnReallocation(device->vk.alloc.pUserData, original, size, alignment, @@ -364,8 +358,7 @@ radv_load_meta_pipeline(struct radv_device *device) create_info.pInitialData = data; fail: - result = vk_common_CreatePipelineCache(radv_device_to_handle(device), &create_info, NULL, - &device->meta_state.cache); + result = vk_common_CreatePipelineCache(radv_device_to_handle(device), &create_info, NULL, &device->meta_state.cache); if (result == VK_SUCCESS) { device->meta_state.initial_cache_entries = num_cache_entries(device->meta_state.cache); ret = device->meta_state.initial_cache_entries > 0; @@ -393,8 +386,7 @@ radv_store_meta_pipeline(struct radv_device *device) if (num_cache_entries(device->meta_state.cache) <= device->meta_state.initial_cache_entries) return; - if (vk_common_GetPipelineCacheData(radv_device_to_handle(device), device->meta_state.cache, - &size, NULL)) + if (vk_common_GetPipelineCacheData(radv_device_to_handle(device), device->meta_state.cache, &size, NULL)) return; if (!radv_builtin_cache_path(path)) @@ -409,8 +401,7 @@ radv_store_meta_pipeline(struct radv_device *device) if (!data) goto fail; - if (vk_common_GetPipelineCacheData(radv_device_to_handle(device), device->meta_state.cache, - &size, data)) + if (vk_common_GetPipelineCacheData(radv_device_to_handle(device), device->meta_state.cache, &size, data)) goto fail; if (write(fd, data, size) == -1) goto fail; @@ -640,9 +631,8 @@ radv_meta_build_nir_fs_noop(struct radv_device *dev) } void -radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, - int samples, nir_variable *input_img, nir_variable *color, - nir_ssa_def *img_coord) +radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples, + nir_variable *input_img, nir_variable *color, nir_ssa_def *img_coord) { nir_deref_instr *input_img_deref = nir_build_deref_var(b, input_img); nir_ssa_def *sample0 = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, 0)); @@ -676,8 +666,7 @@ radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, nir_ssa_def * radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding) { - nir_ssa_def *rsrc = nir_vulkan_resource_index(b, 3, 32, nir_imm_int(b, 0), .desc_set = desc_set, - .binding = binding); + nir_ssa_def *rsrc = nir_vulkan_resource_index(b, 3, 32, nir_imm_int(b, 0), .desc_set = desc_set, .binding = binding); return nir_trim_vector(b, rsrc, 2); } @@ -688,11 +677,11 @@ get_global_ids(nir_builder *b, unsigned num_components) nir_ssa_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask); nir_ssa_def *block_ids = nir_channels(b, nir_load_workgroup_id(b, 32), mask); - nir_ssa_def *block_size = nir_channels( - b, - nir_imm_ivec4(b, b->shader->info.workgroup_size[0], b->shader->info.workgroup_size[1], - b->shader->info.workgroup_size[2], 0), - mask); + nir_ssa_def *block_size = + nir_channels(b, + nir_imm_ivec4(b, b->shader->info.workgroup_size[0], b->shader->info.workgroup_size[1], + b->shader->info.workgroup_size[2], 0), + mask); return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids); } diff --git a/src/amd/vulkan/meta/radv_meta.h b/src/amd/vulkan/meta/radv_meta.h index 02531c4..da38b04 100644 --- a/src/amd/vulkan/meta/radv_meta.h +++ b/src/amd/vulkan/meta/radv_meta.h @@ -112,16 +112,13 @@ void radv_device_finish_meta_etc_decode_state(struct radv_device *device); VkResult radv_device_init_dgc_prepare_state(struct radv_device *device); void radv_device_finish_dgc_prepare_state(struct radv_device *device); -void radv_meta_save(struct radv_meta_saved_state *saved_state, struct radv_cmd_buffer *cmd_buffer, - uint32_t flags); +void radv_meta_save(struct radv_meta_saved_state *saved_state, struct radv_cmd_buffer *cmd_buffer, uint32_t flags); -void radv_meta_restore(const struct radv_meta_saved_state *state, - struct radv_cmd_buffer *cmd_buffer); +void radv_meta_restore(const struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer); VkImageViewType radv_meta_get_view_type(const struct radv_image *image); -uint32_t radv_meta_get_iview_layer(const struct radv_image *dst_image, - const VkImageSubresourceLayers *dst_subresource, +uint32_t radv_meta_get_iview_layer(const struct radv_image *dst_image, const VkImageSubresourceLayers *dst_subresource, const VkOffset3D *dst_offset); struct radv_meta_blit2d_surf { @@ -154,24 +151,21 @@ struct radv_meta_blit2d_rect { void radv_meta_begin_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_saved_state *save); void radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img, - struct radv_meta_blit2d_buffer *src_buf, struct radv_meta_blit2d_surf *dst, - unsigned num_rects, struct radv_meta_blit2d_rect *rects); + struct radv_meta_blit2d_buffer *src_buf, struct radv_meta_blit2d_surf *dst, unsigned num_rects, + struct radv_meta_blit2d_rect *rects); void radv_meta_end_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_saved_state *save); VkResult radv_device_init_meta_bufimage_state(struct radv_device *device); void radv_device_finish_meta_bufimage_state(struct radv_device *device); -void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_surf *src, +void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src, struct radv_meta_blit2d_buffer *dst, unsigned num_rects, struct radv_meta_blit2d_rect *rects); -void radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_buffer *src, +void radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src, struct radv_meta_blit2d_surf *dst, unsigned num_rects, struct radv_meta_blit2d_rect *rects); -void radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_surf *src, +void radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src, struct radv_meta_blit2d_surf *dst, unsigned num_rects, struct radv_meta_blit2d_rect *rects); void radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst, @@ -183,35 +177,30 @@ void radv_expand_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_i void radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *subresourceRange, struct radv_sample_locations_state *sample_locs); -void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, +void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *subresourceRange); void radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *subresourceRange); void radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image); void radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *subresourceRange); -void radv_copy_vrs_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *vrs_image, - const VkRect2D *rect, struct radv_image *dst_image, - struct radv_buffer *htile_buffer, bool read_htile_value); +void radv_copy_vrs_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *vrs_image, const VkRect2D *rect, + struct radv_image *dst_image, struct radv_buffer *htile_buffer, bool read_htile_value); -bool radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer, - const struct radv_image *src_image, const struct radv_image *dst_image, - unsigned num_rects, const struct radv_meta_blit2d_rect *rects); +bool radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image, + const struct radv_image *dst_image, unsigned num_rects, + const struct radv_meta_blit2d_rect *rects); void radv_fmask_copy(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src, struct radv_meta_blit2d_surf *dst); -void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *src_image, VkFormat src_format, - VkImageLayout src_image_layout, struct radv_image *dst_image, +void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, + VkFormat src_format, VkImageLayout src_image_layout, struct radv_image *dst_image, VkFormat dst_format, VkImageLayout dst_image_layout, const VkImageResolve2 *region); -void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *src_image, VkImageLayout src_image_layout, - struct radv_image *dst_image, - VkImageLayout dst_image_layout, - const VkImageResolve2 *region); +void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, + VkImageLayout src_image_layout, struct radv_image *dst_image, + VkImageLayout dst_image_layout, const VkImageResolve2 *region); void radv_decompress_resolve_rendering_src(struct radv_cmd_buffer *cmd_buffer); @@ -227,12 +216,10 @@ uint32_t radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *i uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, const VkImageSubresourceRange *range, uint32_t value); -void radv_update_buffer_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const void *data, - uint64_t size); +void radv_update_buffer_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const void *data, uint64_t size); -void radv_meta_decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - VkImageLayout layout, const VkImageSubresourceLayers *subresource, - VkOffset3D offset, VkExtent3D extent); +void radv_meta_decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout layout, + const VkImageSubresourceLayers *subresource, VkOffset3D offset, VkExtent3D extent); /** * Return whether the bound pipeline is the FMASK decompress pass. @@ -260,8 +247,8 @@ radv_is_dcc_decompress_pipeline(struct radv_cmd_buffer *cmd_buffer) return false; return pipeline->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX8 || - (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 && - pipeline->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11); + (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 && + pipeline->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11); } /* common nir builder helpers */ @@ -273,9 +260,8 @@ nir_builder PRINTFLIKE(3, 4) nir_shader *radv_meta_build_nir_vs_generate_vertices(struct radv_device *dev); nir_shader *radv_meta_build_nir_fs_noop(struct radv_device *dev); -void radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, - int samples, nir_variable *input_img, nir_variable *color, - nir_ssa_def *img_coord); +void radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples, + nir_variable *input_img, nir_variable *color, nir_ssa_def *img_coord); nir_ssa_def *radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding); diff --git a/src/amd/vulkan/meta/radv_meta_blit.c b/src/amd/vulkan/meta/radv_meta_blit.c index 908c0a2..cd76632 100644 --- a/src/amd/vulkan/meta/radv_meta_blit.c +++ b/src/amd/vulkan/meta/radv_meta_blit.c @@ -31,9 +31,8 @@ struct blit_region { VkExtent3D dst_extent; }; -static VkResult build_pipeline(struct radv_device *device, VkImageAspectFlagBits aspect, - enum glsl_sampler_dim tex_dim, VkFormat format, - VkPipeline *pipeline); +static VkResult build_pipeline(struct radv_device *device, VkImageAspectFlagBits aspect, enum glsl_sampler_dim tex_dim, + VkFormat format, VkPipeline *pipeline); static nir_shader * build_nir_vertex_shader(struct radv_device *dev) @@ -53,8 +52,7 @@ build_nir_vertex_shader(struct radv_device *dev) nir_store_var(&b, pos_out, outvec, 0xf); nir_ssa_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16); - nir_ssa_def *src0_z = - nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4); + nir_ssa_def *src0_z = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4); nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(&b); @@ -114,8 +112,7 @@ static nir_shader * build_nir_copy_fragment_shader_depth(struct radv_device *dev, enum glsl_sampler_dim tex_dim) { const struct glsl_type *vec4 = glsl_vec4_type(); - nir_builder b = - radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_blit_depth_fs.%d", tex_dim); + nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_blit_depth_fs.%d", tex_dim); nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos"); tex_pos_in->data.location = VARYING_SLOT_VAR0; @@ -147,8 +144,7 @@ static nir_shader * build_nir_copy_fragment_shader_stencil(struct radv_device *dev, enum glsl_sampler_dim tex_dim) { const struct glsl_type *vec4 = glsl_vec4_type(); - nir_builder b = - radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_blit_stencil_fs.%d", tex_dim); + nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_blit_stencil_fs.%d", tex_dim); nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos"); tex_pos_in->data.location = VARYING_SLOT_VAR0; @@ -192,10 +188,9 @@ translate_sampler_dim(VkImageType type) } static void -meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, - struct radv_image_view *src_iview, VkImageLayout src_image_layout, - float src_offset_0[3], float src_offset_1[3], struct radv_image *dst_image, - struct radv_image_view *dst_iview, VkImageLayout dst_image_layout, +meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, struct radv_image_view *src_iview, + VkImageLayout src_image_layout, float src_offset_0[3], float src_offset_1[3], + struct radv_image *dst_image, struct radv_image_view *dst_iview, VkImageLayout dst_image_layout, VkRect2D dst_box, VkSampler sampler) { struct radv_device *device = cmd_buffer->device; @@ -208,14 +203,12 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, assert(src_image->vk.samples == dst_image->vk.samples); float vertex_push_constants[5] = { - src_offset_0[0] / (float)src_width, src_offset_0[1] / (float)src_height, - src_offset_1[0] / (float)src_width, src_offset_1[1] / (float)src_height, - src_offset_0[2] / (float)src_depth, + src_offset_0[0] / (float)src_width, src_offset_0[1] / (float)src_height, src_offset_1[0] / (float)src_width, + src_offset_1[1] / (float)src_height, src_offset_0[2] / (float)src_depth, }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.blit.pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, 20, - vertex_push_constants); + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.blit.pipeline_layout, + VK_SHADER_STAGE_VERTEX_BIT, 0, 20, vertex_push_constants); VkPipeline *pipeline = NULL; unsigned fs_key = 0; @@ -282,8 +275,7 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, } if (!*pipeline) { - VkResult ret = build_pipeline(device, src_iview->vk.aspects, - translate_sampler_dim(src_image->vk.image_type), + VkResult ret = build_pipeline(device, src_iview->vk.aspects, translate_sampler_dim(src_image->vk.image_type), format, pipeline); if (ret != VK_SUCCESS) { vk_command_buffer_set_error(&cmd_buffer->vk, ret); @@ -291,32 +283,31 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, } } - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, - *pipeline); - - radv_meta_push_descriptor_set( - cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.blit.pipeline_layout, - 0, /* set */ - 1, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = (VkDescriptorImageInfo[]){ - { - .sampler = sampler, - .imageView = radv_image_view_to_handle(src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - }}}); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + + radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.blit.pipeline_layout, + 0, /* set */ + 1, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]){ + { + .sampler = sampler, + .imageView = radv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }}}); VkRenderingInfo rendering_info = { .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, - .renderArea = { - .offset = { 0, 0 }, - .extent = { dst_width, dst_height }, - }, + .renderArea = + { + .offset = {0, 0}, + .extent = {dst_width, dst_height}, + }, .layerCount = 1, }; @@ -325,7 +316,7 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, unsigned dst_layout = radv_meta_dst_layout_from_layout(dst_image_layout); VkImageLayout layout = radv_meta_dst_layout_to_layout(dst_layout); - color_att = (VkRenderingAttachmentInfo) { + color_att = (VkRenderingAttachmentInfo){ .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, .imageView = radv_image_view_to_handle(dst_iview), .imageLayout = layout, @@ -341,7 +332,7 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst_image_layout); VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout); - depth_att = (VkRenderingAttachmentInfo) { + depth_att = (VkRenderingAttachmentInfo){ .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, .imageView = radv_image_view_to_handle(dst_iview), .imageLayout = layout, @@ -356,7 +347,7 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst_image_layout); VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout); - stencil_att = (VkRenderingAttachmentInfo) { + stencil_att = (VkRenderingAttachmentInfo){ .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, .imageView = radv_image_view_to_handle(dst_iview), .imageLayout = layout, @@ -394,9 +385,8 @@ flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1) } static void -blit_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, - VkImageLayout src_image_layout, struct radv_image *dst_image, - VkImageLayout dst_image_layout, const VkImageBlit2 *region, VkFilter filter) +blit_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout, + struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageBlit2 *region, VkFilter filter) { const VkImageSubresourceLayers *src_res = ®ion->srcSubresource; const VkImageSubresourceLayers *dst_res = ®ion->dstSubresource; @@ -427,8 +417,8 @@ blit_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, * affected by conditional rendering. */ radv_meta_save(&saved_state, cmd_buffer, - RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | - RADV_META_SAVE_DESCRIPTORS | RADV_META_SUSPEND_PREDICATING); + RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS | + RADV_META_SUSPEND_PREDICATING); unsigned dst_start, dst_end; if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) { @@ -498,13 +488,12 @@ blit_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, .minDepth = 0.0f, .maxDepth = 1.0f}); - radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, - &(VkRect2D){ - .offset = (VkOffset2D){MIN2(dst_offset_0.x, dst_offset_1.x), - MIN2(dst_offset_0.y, dst_offset_1.y)}, - .extent = (VkExtent2D){abs(dst_offset_1.x - dst_offset_0.x), - abs(dst_offset_1.y - dst_offset_0.y)}, - }); + radv_CmdSetScissor( + radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, + &(VkRect2D){ + .offset = (VkOffset2D){MIN2(dst_offset_0.x, dst_offset_1.x), MIN2(dst_offset_0.y, dst_offset_1.y)}, + .extent = (VkExtent2D){abs(dst_offset_1.x - dst_offset_0.x), abs(dst_offset_1.y - dst_offset_0.y)}, + }); const unsigned num_layers = dst_end - dst_start; for (unsigned i = 0; i < num_layers; i++) { @@ -551,8 +540,8 @@ blit_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, .layerCount = 1}, }, 0, NULL); - meta_emit_blit(cmd_buffer, src_image, &src_iview, src_image_layout, src_offset_0, - src_offset_1, dst_image, &dst_iview, dst_image_layout, dst_box, sampler); + meta_emit_blit(cmd_buffer, src_image, &src_iview, src_image_layout, src_offset_0, src_offset_1, dst_image, + &dst_iview, dst_image_layout, dst_box, sampler); radv_image_view_finish(&dst_iview); radv_image_view_finish(&src_iview); @@ -571,9 +560,8 @@ radv_CmdBlitImage2(VkCommandBuffer commandBuffer, const VkBlitImageInfo2 *pBlitI RADV_FROM_HANDLE(radv_image, dst_image, pBlitImageInfo->dstImage); for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) { - blit_image(cmd_buffer, src_image, pBlitImageInfo->srcImageLayout, dst_image, - pBlitImageInfo->dstImageLayout, &pBlitImageInfo->pRegions[r], - pBlitImageInfo->filter); + blit_image(cmd_buffer, src_image, pBlitImageInfo->srcImageLayout, dst_image, pBlitImageInfo->dstImageLayout, + &pBlitImageInfo->pRegions[r], pBlitImageInfo->filter); } } @@ -583,37 +571,27 @@ radv_device_finish_meta_blit_state(struct radv_device *device) struct radv_meta_state *state = &device->meta_state; for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) { - radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_1d_src[i], - &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_2d_src[i], - &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_3d_src[i], - &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_1d_src[i], &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_2d_src[i], &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_3d_src[i], &state->alloc); } - radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_1d_pipeline, - &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_2d_pipeline, - &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_3d_pipeline, - &state->alloc); - - radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_1d_pipeline, - &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_2d_pipeline, - &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_3d_pipeline, - &state->alloc); - - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->blit.pipeline_layout, - &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), - state->blit.ds_layout, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_1d_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_2d_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_3d_pipeline, &state->alloc); + + radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_1d_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_2d_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_3d_pipeline, &state->alloc); + + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->blit.pipeline_layout, &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->blit.ds_layout, + &state->alloc); } static VkResult -build_pipeline(struct radv_device *device, VkImageAspectFlagBits aspect, - enum glsl_sampler_dim tex_dim, VkFormat format, VkPipeline *pipeline) +build_pipeline(struct radv_device *device, VkImageAspectFlagBits aspect, enum glsl_sampler_dim tex_dim, VkFormat format, + VkPipeline *pipeline) { VkResult result = VK_SUCCESS; @@ -686,13 +664,12 @@ build_pipeline(struct radv_device *device, VkImageAspectFlagBits aspect, .scissorCount = 1, }, .pRasterizationState = - &(VkPipelineRasterizationStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .rasterizerDiscardEnable = false, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, - .lineWidth = 1.0f}, + &(VkPipelineRasterizationStateCreateInfo){.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + .lineWidth = 1.0f}, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo){ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, @@ -719,11 +696,12 @@ build_pipeline(struct radv_device *device, VkImageAspectFlagBits aspect, VkPipelineColorBlendStateCreateInfo color_blend_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .attachmentCount = 1, - .pAttachments = (VkPipelineColorBlendAttachmentState[]){ - {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT}, - }, - .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }}; + .pAttachments = + (VkPipelineColorBlendAttachmentState[]){ + {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT}, + }, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}}; VkPipelineDepthStencilStateCreateInfo depth_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, @@ -770,9 +748,8 @@ build_pipeline(struct radv_device *device, VkImageAspectFlagBits aspect, const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true}; - result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info, &radv_pipeline_info, - &device->meta_state.alloc, pipeline); + result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + &radv_pipeline_info, &device->meta_state.alloc, pipeline); ralloc_free(vs); ralloc_free(fs); mtx_unlock(&device->meta_state.mtx); @@ -820,20 +797,17 @@ radv_device_init_meta_blit_depth(struct radv_device *device, bool on_demand) if (on_demand) return VK_SUCCESS; - result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_1D, - VK_FORMAT_D32_SFLOAT, + result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_1D, VK_FORMAT_D32_SFLOAT, &device->meta_state.blit.depth_only_1d_pipeline); if (result != VK_SUCCESS) goto fail; - result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_2D, - VK_FORMAT_D32_SFLOAT, + result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_2D, VK_FORMAT_D32_SFLOAT, &device->meta_state.blit.depth_only_2d_pipeline); if (result != VK_SUCCESS) goto fail; - result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_3D, - VK_FORMAT_D32_SFLOAT, + result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_3D, VK_FORMAT_D32_SFLOAT, &device->meta_state.blit.depth_only_3d_pipeline); if (result != VK_SUCCESS) goto fail; @@ -850,20 +824,17 @@ radv_device_init_meta_blit_stencil(struct radv_device *device, bool on_demand) if (on_demand) return VK_SUCCESS; - result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_1D, - VK_FORMAT_S8_UINT, + result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_1D, VK_FORMAT_S8_UINT, &device->meta_state.blit.stencil_only_1d_pipeline); if (result != VK_SUCCESS) goto fail; - result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_2D, - VK_FORMAT_S8_UINT, + result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_2D, VK_FORMAT_S8_UINT, &device->meta_state.blit.stencil_only_2d_pipeline); if (result != VK_SUCCESS) goto fail; - result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_3D, - VK_FORMAT_S8_UINT, + result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_3D, VK_FORMAT_S8_UINT, &device->meta_state.blit.stencil_only_3d_pipeline); if (result != VK_SUCCESS) goto fail; @@ -877,20 +848,18 @@ radv_device_init_meta_blit_state(struct radv_device *device, bool on_demand) { VkResult result; - VkDescriptorSetLayoutCreateInfo ds_layout_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL}, - }}; - result = - radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, - &device->meta_state.alloc, &device->meta_state.blit.ds_layout); + VkDescriptorSetLayoutCreateInfo ds_layout_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL}, + }}; + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, &device->meta_state.alloc, + &device->meta_state.blit.ds_layout); if (result != VK_SUCCESS) return result; @@ -904,8 +873,7 @@ radv_device_init_meta_blit_state(struct radv_device *device, bool on_demand) .pushConstantRangeCount = 1, .pPushConstantRanges = &push_constant_range, }, - &device->meta_state.alloc, - &device->meta_state.blit.pipeline_layout); + &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout); if (result != VK_SUCCESS) return result; diff --git a/src/amd/vulkan/meta/radv_meta_blit2d.c b/src/amd/vulkan/meta/radv_meta_blit2d.c index ea1a92f9..209902f 100644 --- a/src/amd/vulkan/meta/radv_meta_blit2d.c +++ b/src/amd/vulkan/meta/radv_meta_blit2d.c @@ -35,21 +35,18 @@ enum blit2d_src_type { BLIT2D_NUM_SRC_TYPES, }; -static VkResult blit2d_init_color_pipeline(struct radv_device *device, - enum blit2d_src_type src_type, VkFormat format, +static VkResult blit2d_init_color_pipeline(struct radv_device *device, enum blit2d_src_type src_type, VkFormat format, uint32_t log2_samples); -static VkResult blit2d_init_depth_only_pipeline(struct radv_device *device, - enum blit2d_src_type src_type, +static VkResult blit2d_init_depth_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type, uint32_t log2_samples); -static VkResult blit2d_init_stencil_only_pipeline(struct radv_device *device, - enum blit2d_src_type src_type, +static VkResult blit2d_init_stencil_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type, uint32_t log2_samples); static void -create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, - struct radv_image_view *iview, VkFormat depth_format, VkImageAspectFlagBits aspects) +create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, struct radv_image_view *iview, + VkFormat depth_format, VkImageAspectFlagBits aspects) { VkFormat format; @@ -70,14 +67,12 @@ create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *s .baseArrayLayer = surf->layer, .layerCount = 1}, }, - 0, &(struct radv_image_view_extra_create_info){ - .disable_dcc_mrt = surf->disable_compression - }); + 0, &(struct radv_image_view_extra_create_info){.disable_dcc_mrt = surf->disable_compression}); } static void -create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src, - struct radv_buffer_view *bview, VkFormat depth_format) +create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src, struct radv_buffer_view *bview, + VkFormat depth_format) { VkFormat format; @@ -103,9 +98,8 @@ struct blit2d_src_temps { static void blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img, - struct radv_meta_blit2d_buffer *src_buf, struct blit2d_src_temps *tmp, - enum blit2d_src_type src_type, VkFormat depth_format, VkImageAspectFlagBits aspects, - uint32_t log2_samples) + struct radv_meta_blit2d_buffer *src_buf, struct blit2d_src_temps *tmp, enum blit2d_src_type src_type, + VkFormat depth_format, VkImageAspectFlagBits aspects, uint32_t log2_samples) { struct radv_device *device = cmd_buffer->device; @@ -113,20 +107,19 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf create_bview(cmd_buffer, src_buf, &tmp->bview, depth_format); radv_meta_push_descriptor_set( - cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit2d[log2_samples].p_layouts[src_type], 0, /* set */ - 1, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){ - {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, - .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(&tmp->bview)}}}); + cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.blit2d[log2_samples].p_layouts[src_type], + 0, /* set */ + 1, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(&tmp->bview)}}}); radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.blit2d[log2_samples].p_layouts[src_type], - VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4, &src_buf->pitch); + device->meta_state.blit2d[log2_samples].p_layouts[src_type], VK_SHADER_STAGE_FRAGMENT_BIT, + 16, 4, &src_buf->pitch); } else { create_iview(cmd_buffer, src_img, &tmp->iview, depth_format, aspects); @@ -135,22 +128,21 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf device->meta_state.blit2d[log2_samples].p_layouts[src_type], VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4, &src_img->layer); - radv_meta_push_descriptor_set( - cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit2d[log2_samples].p_layouts[src_type], 0, /* set */ - 1, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]){ - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(&tmp->iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - }}}); + radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit2d[log2_samples].p_layouts[src_type], 0, /* set */ + 1, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]){ + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(&tmp->iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }}}); } } @@ -161,44 +153,33 @@ struct blit2d_dst_temps { }; static void -bind_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type, unsigned fs_key, - uint32_t log2_samples) +bind_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type, unsigned fs_key, uint32_t log2_samples) { - VkPipeline pipeline = - cmd_buffer->device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]; + VkPipeline pipeline = cmd_buffer->device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]; - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } static void -bind_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type, - uint32_t log2_samples) +bind_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type, uint32_t log2_samples) { - VkPipeline pipeline = - cmd_buffer->device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]; + VkPipeline pipeline = cmd_buffer->device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]; - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } static void -bind_stencil_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type, - uint32_t log2_samples) +bind_stencil_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type, uint32_t log2_samples) { - VkPipeline pipeline = - cmd_buffer->device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]; + VkPipeline pipeline = cmd_buffer->device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]; - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } static void -radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_surf *src_img, - struct radv_meta_blit2d_buffer *src_buf, - struct radv_meta_blit2d_surf *dst, unsigned num_rects, - struct radv_meta_blit2d_rect *rects, enum blit2d_src_type src_type, +radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img, + struct radv_meta_blit2d_buffer *src_buf, struct radv_meta_blit2d_surf *dst, + unsigned num_rects, struct radv_meta_blit2d_rect *rects, enum blit2d_src_type src_type, uint32_t log2_samples) { struct radv_device *device = cmd_buffer->device; @@ -218,8 +199,7 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, .extent = (VkExtent2D){rects[r].width, rects[r].height}, }); - u_foreach_bit(i, dst->aspect_mask) - { + u_foreach_bit (i, dst->aspect_mask) { unsigned aspect_mask = 1u << i; unsigned src_aspect_mask = aspect_mask; VkFormat depth_format = 0; @@ -231,8 +211,8 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, src_aspect_mask = src_img->aspect_mask; struct blit2d_src_temps src_temps; - blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format, - src_aspect_mask, log2_samples); + blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format, src_aspect_mask, + log2_samples); struct blit2d_dst_temps dst_temps; create_iview(cmd_buffer, dst, &dst_temps.iview, depth_format, aspect_mask); @@ -245,19 +225,16 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, }; radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.blit2d[log2_samples].p_layouts[src_type], - VK_SHADER_STAGE_VERTEX_BIT, 0, 16, vertex_push_constants); + device->meta_state.blit2d[log2_samples].p_layouts[src_type], VK_SHADER_STAGE_VERTEX_BIT, + 0, 16, vertex_push_constants); - if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT || - aspect_mask == VK_IMAGE_ASPECT_PLANE_0_BIT || - aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT || - aspect_mask == VK_IMAGE_ASPECT_PLANE_2_BIT) { + if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT || aspect_mask == VK_IMAGE_ASPECT_PLANE_0_BIT || + aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT || aspect_mask == VK_IMAGE_ASPECT_PLANE_2_BIT) { unsigned fs_key = radv_format_meta_fs_key(device, dst_temps.iview.vk.format); - if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] == - VK_NULL_HANDLE) { - VkResult ret = blit2d_init_color_pipeline( - device, src_type, radv_fs_key_format_exemplars[fs_key], log2_samples); + if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] == VK_NULL_HANDLE) { + VkResult ret = + blit2d_init_color_pipeline(device, src_type, radv_fs_key_format_exemplars[fs_key], log2_samples); if (ret != VK_SUCCESS) { vk_command_buffer_set_error(&cmd_buffer->vk, ret); goto fail_pipeline; @@ -274,10 +251,11 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, const VkRenderingInfo rendering_info = { .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, - .renderArea = { - .offset = { rects[r].dst_x, rects[r].dst_y }, - .extent = { rects[r].width, rects[r].height }, - }, + .renderArea = + { + .offset = {rects[r].dst_x, rects[r].dst_y}, + .extent = {rects[r].width, rects[r].height}, + }, .layerCount = 1, .colorAttachmentCount = 1, .pColorAttachments = &color_att_info, @@ -287,8 +265,7 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, bind_pipeline(cmd_buffer, src_type, fs_key, log2_samples); } else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) { - if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type] == - VK_NULL_HANDLE) { + if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type] == VK_NULL_HANDLE) { VkResult ret = blit2d_init_depth_only_pipeline(device, src_type, log2_samples); if (ret != VK_SUCCESS) { vk_command_buffer_set_error(&cmd_buffer->vk, ret); @@ -306,14 +283,14 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, const VkRenderingInfo rendering_info = { .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, - .renderArea = { - .offset = { rects[r].dst_x, rects[r].dst_y }, - .extent = { rects[r].width, rects[r].height }, - }, + .renderArea = + { + .offset = {rects[r].dst_x, rects[r].dst_y}, + .extent = {rects[r].width, rects[r].height}, + }, .layerCount = 1, .pDepthAttachment = &depth_att_info, - .pStencilAttachment = (dst->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? - &depth_att_info : NULL, + .pStencilAttachment = (dst->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? &depth_att_info : NULL, }; radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info); @@ -321,8 +298,7 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, bind_depth_pipeline(cmd_buffer, src_type, log2_samples); } else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) { - if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type] == - VK_NULL_HANDLE) { + if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type] == VK_NULL_HANDLE) { VkResult ret = blit2d_init_stencil_only_pipeline(device, src_type, log2_samples); if (ret != VK_SUCCESS) { vk_command_buffer_set_error(&cmd_buffer->vk, ret); @@ -340,13 +316,13 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, const VkRenderingInfo rendering_info = { .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, - .renderArea = { - .offset = { rects[r].dst_x, rects[r].dst_y }, - .extent = { rects[r].width, rects[r].height }, - }, + .renderArea = + { + .offset = {rects[r].dst_x, rects[r].dst_y}, + .extent = {rects[r].width, rects[r].height}, + }, .layerCount = 1, - .pDepthAttachment = (dst->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? - &stencil_att_info : NULL, + .pDepthAttachment = (dst->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? &stencil_att_info : NULL, .pStencilAttachment = &stencil_att_info, }; @@ -374,8 +350,8 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, void radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img, - struct radv_meta_blit2d_buffer *src_buf, struct radv_meta_blit2d_surf *dst, - unsigned num_rects, struct radv_meta_blit2d_rect *rects) + struct radv_meta_blit2d_buffer *src_buf, struct radv_meta_blit2d_surf *dst, unsigned num_rects, + struct radv_meta_blit2d_rect *rects) { bool use_3d = (src_img && src_img->image->vk.image_type == VK_IMAGE_TYPE_3D); enum blit2d_src_type src_type = src_buf ? BLIT2D_SRC_TYPE_BUFFER @@ -423,12 +399,11 @@ build_nir_vertex_shader(struct radv_device *device) return b.shader; } -typedef nir_ssa_def *(*texel_fetch_build_func)(struct nir_builder *, struct radv_device *, - nir_ssa_def *, bool, bool); +typedef nir_ssa_def *(*texel_fetch_build_func)(struct nir_builder *, struct radv_device *, nir_ssa_def *, bool, bool); static nir_ssa_def * -build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos, - bool is_3d, bool is_multisampled) +build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos, bool is_3d, + bool is_multisampled) { enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : is_multisampled ? GLSL_SAMPLER_DIM_MS @@ -441,8 +416,7 @@ build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa nir_ssa_def *tex_pos_3d = NULL; nir_ssa_def *sample_idx = NULL; if (is_3d) { - nir_ssa_def *layer = - nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4); + nir_ssa_def *layer = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4); nir_ssa_def *chans[3]; chans[0] = nir_channel(b, tex_pos, 0); @@ -464,11 +438,10 @@ build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa } static nir_ssa_def * -build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos, - bool is_3d, bool is_multisampled) +build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos, bool is_3d, + bool is_multisampled) { - const struct glsl_type *sampler_type = - glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT); + const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT); nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex"); sampler->data.descriptor_set = 0; sampler->data.binding = 0; @@ -491,8 +464,8 @@ static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = { }; static nir_shader * -build_nir_copy_fragment_shader(struct radv_device *device, texel_fetch_build_func txf_func, - const char *name, bool is_3d, bool is_multisampled) +build_nir_copy_fragment_shader(struct radv_device *device, texel_fetch_build_func txf_func, const char *name, + bool is_3d, bool is_multisampled) { const struct glsl_type *vec4 = glsl_vec4_type(); const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2); @@ -516,8 +489,8 @@ build_nir_copy_fragment_shader(struct radv_device *device, texel_fetch_build_fun } static nir_shader * -build_nir_copy_fragment_shader_depth(struct radv_device *device, texel_fetch_build_func txf_func, - const char *name, bool is_3d, bool is_multisampled) +build_nir_copy_fragment_shader_depth(struct radv_device *device, texel_fetch_build_func txf_func, const char *name, + bool is_3d, bool is_multisampled) { const struct glsl_type *vec4 = glsl_vec4_type(); const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2); @@ -541,8 +514,8 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device, texel_fetch_bui } static nir_shader * -build_nir_copy_fragment_shader_stencil(struct radv_device *device, texel_fetch_build_func txf_func, - const char *name, bool is_3d, bool is_multisampled) +build_nir_copy_fragment_shader_stencil(struct radv_device *device, texel_fetch_build_func txf_func, const char *name, + bool is_3d, bool is_multisampled) { const struct glsl_type *vec4 = glsl_vec4_type(); const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2); @@ -572,29 +545,27 @@ radv_device_finish_meta_blit2d_state(struct radv_device *device) for (unsigned log2_samples = 0; log2_samples < MAX_SAMPLES_LOG2; ++log2_samples) { for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) { - radv_DestroyPipelineLayout(radv_device_to_handle(device), - state->blit2d[log2_samples].p_layouts[src], &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->blit2d[log2_samples].p_layouts[src], + &state->alloc); device->vk.dispatch_table.DestroyDescriptorSetLayout( - radv_device_to_handle(device), state->blit2d[log2_samples].ds_layouts[src], - &state->alloc); + radv_device_to_handle(device), state->blit2d[log2_samples].ds_layouts[src], &state->alloc); for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) { - radv_DestroyPipeline(radv_device_to_handle(device), - state->blit2d[log2_samples].pipelines[src][j], &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->blit2d[log2_samples].pipelines[src][j], + &state->alloc); } - radv_DestroyPipeline(radv_device_to_handle(device), - state->blit2d[log2_samples].depth_only_pipeline[src], &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->blit2d[log2_samples].stencil_only_pipeline[src], + radv_DestroyPipeline(radv_device_to_handle(device), state->blit2d[log2_samples].depth_only_pipeline[src], + &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->blit2d[log2_samples].stencil_only_pipeline[src], &state->alloc); } } } static VkResult -blit2d_init_color_pipeline(struct radv_device *device, enum blit2d_src_type src_type, - VkFormat format, uint32_t log2_samples) +blit2d_init_color_pipeline(struct radv_device *device, enum blit2d_src_type src_type, VkFormat format, + uint32_t log2_samples) { VkResult result; unsigned fs_key = radv_format_meta_fs_key(device, format); @@ -626,8 +597,8 @@ blit2d_init_color_pipeline(struct radv_device *device, enum blit2d_src_type src_ } const VkPipelineVertexInputStateCreateInfo *vi_create_info; - nir_shader *fs = build_nir_copy_fragment_shader( - device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0); + nir_shader *fs = + build_nir_copy_fragment_shader(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0); nir_shader *vs = build_nir_vertex_shader(device); vi_create_info = &normal_vi_create_info; @@ -670,16 +641,15 @@ blit2d_init_color_pipeline(struct radv_device *device, enum blit2d_src_type src_ .scissorCount = 1, }, .pRasterizationState = - &(VkPipelineRasterizationStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .rasterizerDiscardEnable = false, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f}, + &(VkPipelineRasterizationStateCreateInfo){.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f}, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo){ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, @@ -694,10 +664,10 @@ blit2d_init_color_pipeline(struct radv_device *device, enum blit2d_src_type src_ .attachmentCount = 1, .pAttachments = (VkPipelineColorBlendAttachmentState[]){ - {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT}, + {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT}, }, - .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }}, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}}, .pDynamicState = &(VkPipelineDynamicStateCreateInfo){ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, @@ -716,10 +686,9 @@ blit2d_init_color_pipeline(struct radv_device *device, enum blit2d_src_type src_ const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true}; - result = radv_graphics_pipeline_create( - radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, - &radv_pipeline_info, &device->meta_state.alloc, - &device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]); + result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + &radv_pipeline_info, &device->meta_state.alloc, + &device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]); ralloc_free(vs); ralloc_free(fs); @@ -729,8 +698,7 @@ blit2d_init_color_pipeline(struct radv_device *device, enum blit2d_src_type src_ } static VkResult -blit2d_init_depth_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type, - uint32_t log2_samples) +blit2d_init_depth_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type, uint32_t log2_samples) { VkResult result; const char *name; @@ -761,8 +729,8 @@ blit2d_init_depth_only_pipeline(struct radv_device *device, enum blit2d_src_type } const VkPipelineVertexInputStateCreateInfo *vi_create_info; - nir_shader *fs = build_nir_copy_fragment_shader_depth( - device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0); + nir_shader *fs = build_nir_copy_fragment_shader_depth(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, + log2_samples > 0); nir_shader *vs = build_nir_vertex_shader(device); vi_create_info = &normal_vi_create_info; @@ -804,16 +772,15 @@ blit2d_init_depth_only_pipeline(struct radv_device *device, enum blit2d_src_type .scissorCount = 1, }, .pRasterizationState = - &(VkPipelineRasterizationStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .rasterizerDiscardEnable = false, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f}, + &(VkPipelineRasterizationStateCreateInfo){.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f}, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo){ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, @@ -826,7 +793,7 @@ blit2d_init_depth_only_pipeline(struct radv_device *device, enum blit2d_src_type .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .attachmentCount = 0, .pAttachments = NULL, - .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, }, .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo){ @@ -834,24 +801,26 @@ blit2d_init_depth_only_pipeline(struct radv_device *device, enum blit2d_src_type .depthTestEnable = true, .depthWriteEnable = true, .depthCompareOp = VK_COMPARE_OP_ALWAYS, - .front = { - .failOp = VK_STENCIL_OP_KEEP, - .passOp = VK_STENCIL_OP_KEEP, - .depthFailOp = VK_STENCIL_OP_KEEP, - .compareOp = VK_COMPARE_OP_NEVER, - .compareMask = UINT32_MAX, - .writeMask = UINT32_MAX, - .reference = 0u, - }, - .back = { - .failOp = VK_STENCIL_OP_KEEP, - .passOp = VK_STENCIL_OP_KEEP, - .depthFailOp = VK_STENCIL_OP_KEEP, - .compareOp = VK_COMPARE_OP_NEVER, - .compareMask = UINT32_MAX, - .writeMask = UINT32_MAX, - .reference = 0u, - }, + .front = + { + .failOp = VK_STENCIL_OP_KEEP, + .passOp = VK_STENCIL_OP_KEEP, + .depthFailOp = VK_STENCIL_OP_KEEP, + .compareOp = VK_COMPARE_OP_NEVER, + .compareMask = UINT32_MAX, + .writeMask = UINT32_MAX, + .reference = 0u, + }, + .back = + { + .failOp = VK_STENCIL_OP_KEEP, + .passOp = VK_STENCIL_OP_KEEP, + .depthFailOp = VK_STENCIL_OP_KEEP, + .compareOp = VK_COMPARE_OP_NEVER, + .compareMask = UINT32_MAX, + .writeMask = UINT32_MAX, + .reference = 0u, + }, .minDepthBounds = 0.0f, .maxDepthBounds = 1.0f, }, @@ -873,10 +842,9 @@ blit2d_init_depth_only_pipeline(struct radv_device *device, enum blit2d_src_type const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true}; - result = radv_graphics_pipeline_create( - radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, - &radv_pipeline_info, &device->meta_state.alloc, - &device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]); + result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + &radv_pipeline_info, &device->meta_state.alloc, + &device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]); ralloc_free(vs); ralloc_free(fs); @@ -886,8 +854,7 @@ blit2d_init_depth_only_pipeline(struct radv_device *device, enum blit2d_src_type } static VkResult -blit2d_init_stencil_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type, - uint32_t log2_samples) +blit2d_init_stencil_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type, uint32_t log2_samples) { VkResult result; const char *name; @@ -918,8 +885,8 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device, enum blit2d_src_ty } const VkPipelineVertexInputStateCreateInfo *vi_create_info; - nir_shader *fs = build_nir_copy_fragment_shader_stencil( - device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0); + nir_shader *fs = build_nir_copy_fragment_shader_stencil(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, + log2_samples > 0); nir_shader *vs = build_nir_vertex_shader(device); vi_create_info = &normal_vi_create_info; @@ -961,16 +928,15 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device, enum blit2d_src_ty .scissorCount = 1, }, .pRasterizationState = - &(VkPipelineRasterizationStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .rasterizerDiscardEnable = false, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f}, + &(VkPipelineRasterizationStateCreateInfo){.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f}, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo){ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, @@ -983,7 +949,7 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device, enum blit2d_src_ty .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .attachmentCount = 0, .pAttachments = NULL, - .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, }, .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo){ @@ -1027,10 +993,9 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device, enum blit2d_src_ty const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true}; - result = radv_graphics_pipeline_create( - radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, - &radv_pipeline_info, &device->meta_state.alloc, - &device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]); + result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + &radv_pipeline_info, &device->meta_state.alloc, + &device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]); ralloc_free(vs); ralloc_free(fs); @@ -1043,9 +1008,8 @@ static VkResult meta_blit2d_create_pipe_layout(struct radv_device *device, int idx, uint32_t log2_samples) { VkResult result; - VkDescriptorType desc_type = (idx == BLIT2D_SRC_TYPE_BUFFER) - ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + VkDescriptorType desc_type = + (idx == BLIT2D_SRC_TYPE_BUFFER) ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; const VkPushConstantRange push_constant_ranges[] = { {VK_SHADER_STAGE_VERTEX_BIT, 0, 16}, {VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4}, @@ -1054,32 +1018,31 @@ meta_blit2d_create_pipe_layout(struct radv_device *device, int idx, uint32_t log result = radv_CreateDescriptorSetLayout( radv_device_to_handle(device), - &(VkDescriptorSetLayoutCreateInfo){ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 1, - .pBindings = - (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = desc_type, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL}, - }}, + &(VkDescriptorSetLayoutCreateInfo){.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 1, + .pBindings = + (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = desc_type, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL}, + }}, &device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].ds_layouts[idx]); if (result != VK_SUCCESS) goto fail; - result = radv_CreatePipelineLayout( - radv_device_to_handle(device), - &(VkPipelineLayoutCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit2d[log2_samples].ds_layouts[idx], - .pushConstantRangeCount = num_push_constant_range, - .pPushConstantRanges = push_constant_ranges, - }, - &device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].p_layouts[idx]); + result = + radv_CreatePipelineLayout(radv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit2d[log2_samples].ds_layouts[idx], + .pushConstantRangeCount = num_push_constant_range, + .pPushConstantRanges = push_constant_ranges, + }, + &device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].p_layouts[idx]); if (result != VK_SUCCESS) goto fail; return VK_SUCCESS; @@ -1110,8 +1073,7 @@ radv_device_init_meta_blit2d_state(struct radv_device *device, bool on_demand) continue; for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) { - result = blit2d_init_color_pipeline(device, src, radv_fs_key_format_exemplars[j], - log2_samples); + result = blit2d_init_color_pipeline(device, src, radv_fs_key_format_exemplars[j], log2_samples); if (result != VK_SUCCESS) return result; } diff --git a/src/amd/vulkan/meta/radv_meta_buffer.c b/src/amd/vulkan/meta/radv_meta_buffer.c index b78d2fd..9bbd189 100644 --- a/src/amd/vulkan/meta/radv_meta_buffer.c +++ b/src/amd/vulkan/meta/radv_meta_buffer.c @@ -15,11 +15,9 @@ build_buffer_fill_shader(struct radv_device *dev) nir_ssa_def *size_minus16 = nir_channel(&b, pconst, 2); nir_ssa_def *data = nir_swizzle(&b, nir_channel(&b, pconst, 3), (unsigned[]){0, 0, 0, 0}, 4); - nir_ssa_def *global_id = - nir_iadd(&b, - nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0), - b.shader->info.workgroup_size[0]), - nir_load_local_invocation_index(&b)); + nir_ssa_def *global_id = nir_iadd( + &b, nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0), b.shader->info.workgroup_size[0]), + nir_load_local_invocation_index(&b)); nir_ssa_def *offset = nir_imin(&b, nir_imul_imm(&b, global_id, 16), size_minus16); nir_ssa_def *dst_addr = nir_iadd(&b, buffer_addr, nir_u2u64(&b, offset)); @@ -35,21 +33,17 @@ build_buffer_copy_shader(struct radv_device *dev) b.shader->info.workgroup_size[0] = 64; nir_ssa_def *pconst = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16); - nir_ssa_def *size_minus16 = - nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4); + nir_ssa_def *size_minus16 = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4); nir_ssa_def *src_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b0011)); nir_ssa_def *dst_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b1100)); - nir_ssa_def *global_id = - nir_iadd(&b, - nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0), - b.shader->info.workgroup_size[0]), - nir_load_local_invocation_index(&b)); + nir_ssa_def *global_id = nir_iadd( + &b, nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0), b.shader->info.workgroup_size[0]), + nir_load_local_invocation_index(&b)); nir_ssa_def *offset = nir_u2u64(&b, nir_imin(&b, nir_imul_imm(&b, global_id, 16), size_minus16)); - nir_ssa_def *data = - nir_build_load_global(&b, 4, 32, nir_iadd(&b, src_addr, offset), .align_mul = 4); + nir_ssa_def *data = nir_build_load_global(&b, 4, 32, nir_iadd(&b, src_addr, offset), .align_mul = 4); nir_build_store_global(&b, data, nir_iadd(&b, dst_addr, offset), .align_mul = 4); return b.shader; @@ -78,12 +72,10 @@ radv_device_init_meta_buffer_state(struct radv_device *device) .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 0, .pushConstantRangeCount = 1, - .pPushConstantRanges = - &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct fill_constants)}, + .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct fill_constants)}, }; - result = radv_CreatePipelineLayout(radv_device_to_handle(device), &fill_pl_create_info, - &device->meta_state.alloc, + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &fill_pl_create_info, &device->meta_state.alloc, &device->meta_state.buffer.fill_p_layout); if (result != VK_SUCCESS) goto fail; @@ -92,12 +84,10 @@ radv_device_init_meta_buffer_state(struct radv_device *device) .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 0, .pushConstantRangeCount = 1, - .pPushConstantRanges = - &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct copy_constants)}, + .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct copy_constants)}, }; - result = radv_CreatePipelineLayout(radv_device_to_handle(device), ©_pl_create_info, - &device->meta_state.alloc, + result = radv_CreatePipelineLayout(radv_device_to_handle(device), ©_pl_create_info, &device->meta_state.alloc, &device->meta_state.buffer.copy_p_layout); if (result != VK_SUCCESS) goto fail; @@ -118,8 +108,7 @@ radv_device_init_meta_buffer_state(struct radv_device *device) }; result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &fill_vk_pipeline_info, NULL, - &device->meta_state.buffer.fill_pipeline); + &fill_vk_pipeline_info, NULL, &device->meta_state.buffer.fill_pipeline); if (result != VK_SUCCESS) goto fail; @@ -139,8 +128,7 @@ radv_device_init_meta_buffer_state(struct radv_device *device) }; result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - ©_vk_pipeline_info, NULL, - &device->meta_state.buffer.copy_pipeline); + ©_vk_pipeline_info, NULL, &device->meta_state.buffer.copy_pipeline); if (result != VK_SUCCESS) goto fail; @@ -160,10 +148,8 @@ radv_device_finish_meta_buffer_state(struct radv_device *device) radv_DestroyPipeline(radv_device_to_handle(device), state->buffer.copy_pipeline, &state->alloc); radv_DestroyPipeline(radv_device_to_handle(device), state->buffer.fill_pipeline, &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.copy_p_layout, - &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.fill_p_layout, - &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.copy_p_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.fill_p_layout, &state->alloc); } static void @@ -172,9 +158,8 @@ fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t siz struct radv_device *device = cmd_buffer->device; struct radv_meta_saved_state saved_state; - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.buffer.fill_pipeline); @@ -187,9 +172,8 @@ fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t siz .data = data, }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.buffer.fill_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, - sizeof(fill_consts), &fill_consts); + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.buffer.fill_p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(fill_consts), &fill_consts); radv_unaligned_dispatch(cmd_buffer, DIV_ROUND_UP(size, 16), 1, 1); @@ -197,15 +181,13 @@ fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t siz } static void -copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dst_va, - uint64_t size) +copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dst_va, uint64_t size) { struct radv_device *device = cmd_buffer->device; struct radv_meta_saved_state saved_state; - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.buffer.copy_pipeline); @@ -218,9 +200,8 @@ copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t .size_minus16 = size - 16, }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.buffer.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, - sizeof(copy_consts), ©_consts); + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.buffer.copy_p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(copy_consts), ©_consts); radv_unaligned_dispatch(cmd_buffer, DIV_ROUND_UP(size, 16), 1, 1); @@ -228,13 +209,12 @@ copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t } static bool -radv_prefer_compute_dma(const struct radv_device *device, uint64_t size, - struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo) +radv_prefer_compute_dma(const struct radv_device *device, uint64_t size, struct radeon_winsys_bo *src_bo, + struct radeon_winsys_bo *dst_bo) { bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD; - if (device->physical_device->rad_info.gfx_level >= GFX10 && - device->physical_device->rad_info.has_dedicated_vram) { + if (device->physical_device->rad_info.gfx_level >= GFX10 && device->physical_device->rad_info.has_dedicated_vram) { if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) || (dst_bo && !(dst_bo->initial_domain & RADEON_DOMAIN_VRAM))) { /* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */ @@ -246,8 +226,8 @@ radv_prefer_compute_dma(const struct radv_device *device, uint64_t size, } uint32_t -radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, - struct radeon_winsys_bo *bo, uint64_t va, uint64_t size, uint32_t value) +radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, struct radeon_winsys_bo *bo, + uint64_t va, uint64_t size, uint32_t value) { bool use_compute = radv_prefer_compute_dma(cmd_buffer->device, size, NULL, bo); uint32_t flush_bits = 0; @@ -259,8 +239,7 @@ radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo); if (use_compute) { - cmd_buffer->state.flush_bits |= - radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); + cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); fill_buffer_shader(cmd_buffer, va, size, value); @@ -273,9 +252,8 @@ radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im } void -radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo, - struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset, - uint64_t size) +radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo, + uint64_t src_offset, uint64_t dst_offset, uint64_t size) { bool use_compute = !(size & 3) && !(src_offset & 3) && !(dst_offset & 3) && radv_prefer_compute_dma(cmd_buffer->device, size, src_bo, dst_bo); @@ -293,8 +271,8 @@ radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *sr } VKAPI_ATTR void VKAPI_CALL -radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, - VkDeviceSize fillSize, uint32_t data) +radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize fillSize, + uint32_t data) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); @@ -302,13 +280,12 @@ radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSi fillSize = vk_buffer_range(&dst_buffer->vk, dstOffset, fillSize) & ~3ull; radv_fill_buffer(cmd_buffer, NULL, dst_buffer->bo, - radv_buffer_get_va(dst_buffer->bo) + dst_buffer->offset + dstOffset, fillSize, - data); + radv_buffer_get_va(dst_buffer->bo) + dst_buffer->offset + dstOffset, fillSize, data); } static void -copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *src_buffer, - struct radv_buffer *dst_buffer, const VkBufferCopy2 *region) +copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *src_buffer, struct radv_buffer *dst_buffer, + const VkBufferCopy2 *region) { bool old_predicating; @@ -318,9 +295,8 @@ copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *src_buffer, old_predicating = cmd_buffer->state.predicating; cmd_buffer->state.predicating = false; - radv_copy_buffer(cmd_buffer, src_buffer->bo, dst_buffer->bo, - src_buffer->offset + region->srcOffset, dst_buffer->offset + region->dstOffset, - region->size); + radv_copy_buffer(cmd_buffer, src_buffer->bo, dst_buffer->bo, src_buffer->offset + region->srcOffset, + dst_buffer->offset + region->dstOffset, region->size); /* Restore conditional rendering. */ cmd_buffer->state.predicating = old_predicating; @@ -339,8 +315,7 @@ radv_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCop } void -radv_update_buffer_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const void *data, - uint64_t size) +radv_update_buffer_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const void *data, uint64_t size) { uint64_t words = size / 4; bool mec = radv_cmd_buffer_uses_mec(cmd_buffer); @@ -351,8 +326,8 @@ radv_update_buffer_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const voi radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4); radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0)); - radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ? V_370_MEM : V_370_MEM_GRBM) | - S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); + radeon_emit(cmd_buffer->cs, + S_370_DST_SEL(mec ? V_370_MEM : V_370_MEM_GRBM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(cmd_buffer->cs, va); radeon_emit(cmd_buffer->cs, va >> 32); radeon_emit_array(cmd_buffer->cs, data, words); @@ -362,8 +337,8 @@ radv_update_buffer_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const voi } VKAPI_ATTR void VKAPI_CALL -radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, - VkDeviceSize dataSize, const void *pData) +radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, + const void *pData) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); diff --git a/src/amd/vulkan/meta/radv_meta_bufimage.c b/src/amd/vulkan/meta/radv_meta_bufimage.c index eabeaa2..b5fe7cb 100644 --- a/src/amd/vulkan/meta/radv_meta_bufimage.c +++ b/src/amd/vulkan/meta/radv_meta_bufimage.c @@ -35,8 +35,7 @@ build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d) enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D; const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT); const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT); - nir_builder b = - radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs"); + nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs"); b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = 8; nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); @@ -49,13 +48,12 @@ build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d) nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2); - nir_ssa_def *offset = - nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8); + nir_ssa_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8); nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16); nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset); - nir_ssa_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), - nir_trim_vector(&b, img_coord, 2 + is_3d), NULL); + nir_ssa_def *outval = + nir_txf_deref(&b, nir_build_deref_var(&b, input_img), nir_trim_vector(&b, img_coord, 2 + is_3d), NULL); nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); nir_ssa_def *pos_y = nir_channel(&b, global_id, 1); @@ -65,9 +63,8 @@ build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d) nir_ssa_def *coord = nir_replicate(&b, tmp, 4); - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, - nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), - .image_dim = GLSL_SAMPLER_DIM_BUF); + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32), outval, + nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF); return b.shader; } @@ -84,25 +81,23 @@ radv_device_init_meta_itob_state(struct radv_device *device) * two descriptors one for the image being sampled * one for the buffer being written. */ - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 2, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - {.binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, - &device->meta_state.alloc, + VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + {.binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, &device->meta_state.itob.img_ds_layout); if (result != VK_SUCCESS) goto fail; @@ -115,9 +110,8 @@ radv_device_init_meta_itob_state(struct radv_device *device) .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, }; - result = - radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, &device->meta_state.itob.img_p_layout); + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, + &device->meta_state.itob.img_p_layout); if (result != VK_SUCCESS) goto fail; @@ -138,8 +132,8 @@ radv_device_init_meta_itob_state(struct radv_device *device) .layout = device->meta_state.itob.img_p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info, NULL, &device->meta_state.itob.pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + NULL, &device->meta_state.itob.pipeline); if (result != VK_SUCCESS) goto fail; @@ -158,9 +152,8 @@ radv_device_init_meta_itob_state(struct radv_device *device) .layout = device->meta_state.itob.img_p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info_3d, NULL, - &device->meta_state.itob.pipeline_3d); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info_3d, + NULL, &device->meta_state.itob.pipeline_3d); if (result != VK_SUCCESS) goto fail; @@ -179,10 +172,9 @@ radv_device_finish_meta_itob_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itob.img_p_layout, - &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), - state->itob.img_ds_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itob.img_p_layout, &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itob.img_ds_layout, + &state->alloc); radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline, &state->alloc); radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline_3d, &state->alloc); } @@ -191,11 +183,9 @@ static nir_shader * build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d) { enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D; - const struct glsl_type *buf_type = - glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); + const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); - nir_builder b = - radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs"); + nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs"); b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = 8; nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex"); @@ -208,8 +198,7 @@ build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d) nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2); - nir_ssa_def *offset = - nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8); + nir_ssa_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8); nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16); nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); @@ -221,13 +210,12 @@ build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d) nir_ssa_def *coord = nir_iadd(&b, global_id, offset); nir_ssa_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL); - nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), - nir_channel(&b, coord, 1), - is_3d ? nir_channel(&b, coord, 2) : nir_ssa_undef(&b, 1, 32), - nir_ssa_undef(&b, 1, 32)); + nir_ssa_def *img_coord = + nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), + is_3d ? nir_channel(&b, coord, 2) : nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32)); - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, - nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), .image_dim = dim); + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32), + outval, nir_imm_int(&b, 0), .image_dim = dim); return b.shader; } @@ -243,25 +231,23 @@ radv_device_init_meta_btoi_state(struct radv_device *device) * two descriptors one for the image being sampled * one for the buffer being written. */ - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 2, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - {.binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, - &device->meta_state.alloc, + VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + {.binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, &device->meta_state.btoi.img_ds_layout); if (result != VK_SUCCESS) goto fail; @@ -274,9 +260,8 @@ radv_device_init_meta_btoi_state(struct radv_device *device) .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, }; - result = - radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, &device->meta_state.btoi.img_p_layout); + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, + &device->meta_state.btoi.img_p_layout); if (result != VK_SUCCESS) goto fail; @@ -297,8 +282,8 @@ radv_device_init_meta_btoi_state(struct radv_device *device) .layout = device->meta_state.btoi.img_p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info, NULL, &device->meta_state.btoi.pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + NULL, &device->meta_state.btoi.pipeline); if (result != VK_SUCCESS) goto fail; @@ -317,9 +302,8 @@ radv_device_init_meta_btoi_state(struct radv_device *device) .layout = device->meta_state.btoi.img_p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info_3d, NULL, - &device->meta_state.btoi.pipeline_3d); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info_3d, + NULL, &device->meta_state.btoi.pipeline_3d); ralloc_free(cs_3d); ralloc_free(cs); @@ -336,10 +320,9 @@ radv_device_finish_meta_btoi_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi.img_p_layout, - &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), - state->btoi.img_ds_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi.img_p_layout, &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->btoi.img_ds_layout, + &state->alloc); radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline, &state->alloc); radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline_3d, &state->alloc); } @@ -348,8 +331,7 @@ radv_device_finish_meta_btoi_state(struct radv_device *device) static nir_shader * build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev) { - const struct glsl_type *buf_type = - glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); + const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT); nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_btoi_r32g32b32_cs"); b.shader->info.workgroup_size[0] = 8; @@ -386,9 +368,8 @@ build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev) nir_ssa_def *coord = nir_replicate(&b, local_pos, 4); - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, - nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, chan), - nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF); + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32), + nir_channel(&b, outval, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF); } return b.shader; @@ -400,25 +381,23 @@ radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device) VkResult result; nir_shader *cs = build_nir_btoi_r32g32b32_compute_shader(device); - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 2, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - {.binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, - &device->meta_state.alloc, + VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + {.binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, &device->meta_state.btoi_r32g32b32.img_ds_layout); if (result != VK_SUCCESS) goto fail; @@ -431,8 +410,7 @@ radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device) .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, }; - result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, &device->meta_state.btoi_r32g32b32.img_p_layout); if (result != VK_SUCCESS) goto fail; @@ -454,9 +432,8 @@ radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device) .layout = device->meta_state.btoi_r32g32b32.img_p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info, NULL, - &device->meta_state.btoi_r32g32b32.pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + NULL, &device->meta_state.btoi_r32g32b32.pipeline); fail: ralloc_free(cs); @@ -468,12 +445,10 @@ radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi_r32g32b32.img_p_layout, - &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout( - radv_device_to_handle(device), state->btoi_r32g32b32.img_ds_layout, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->btoi_r32g32b32.pipeline, - &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi_r32g32b32.img_p_layout, &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), + state->btoi_r32g32b32.img_ds_layout, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->btoi_r32g32b32.pipeline, &state->alloc); } static nir_shader * @@ -485,8 +460,8 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d, int samples) : GLSL_SAMPLER_DIM_2D; const struct glsl_type *buf_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT); const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); - nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, - is_3d ? "meta_itoi_cs_3d-%d" : "meta_itoi_cs-%d", samples); + nir_builder b = + radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_itoi_cs_3d-%d" : "meta_itoi_cs-%d", samples); b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = 8; nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex"); @@ -499,8 +474,7 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d, int samples) nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2); - nir_ssa_def *src_offset = - nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8); + nir_ssa_def *src_offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8); nir_ssa_def *dst_offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = is_3d ? 24 : 20); @@ -512,22 +486,19 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d, int samples) nir_ssa_def *tex_vals[8]; if (is_multisampled) { for (uint32_t i = 0; i < samples; i++) { - tex_vals[i] = nir_txf_ms_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2), - nir_imm_int(&b, i)); + tex_vals[i] = nir_txf_ms_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2), nir_imm_int(&b, i)); } } else { - tex_vals[0] = nir_txf_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2 + is_3d), - nir_imm_int(&b, 0)); + tex_vals[0] = nir_txf_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2 + is_3d), nir_imm_int(&b, 0)); } - nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), - nir_channel(&b, dst_coord, 1), - is_3d ? nir_channel(&b, dst_coord, 2) : nir_ssa_undef(&b, 1, 32), - nir_ssa_undef(&b, 1, 32)); + nir_ssa_def *img_coord = + nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1), + is_3d ? nir_channel(&b, dst_coord, 2) : nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32)); for (uint32_t i = 0; i < samples; i++) { - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, - nir_imm_int(&b, i), tex_vals[i], nir_imm_int(&b, 0), .image_dim = dim); + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_imm_int(&b, i), + tex_vals[i], nir_imm_int(&b, 0), .image_dim = dim); } return b.shader; @@ -555,8 +526,8 @@ create_itoi_pipeline(struct radv_device *device, int samples, VkPipeline *pipeli .layout = state->itoi.img_p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, - &vk_pipeline_info, NULL, pipeline); + result = + radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &vk_pipeline_info, NULL, pipeline); ralloc_free(cs); return result; } @@ -571,25 +542,23 @@ radv_device_init_meta_itoi_state(struct radv_device *device) * two descriptors one for the image being sampled * one for the buffer being written. */ - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 2, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - {.binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, - &device->meta_state.alloc, + VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + {.binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, &device->meta_state.itoi.img_ds_layout); if (result != VK_SUCCESS) goto fail; @@ -602,9 +571,8 @@ radv_device_init_meta_itoi_state(struct radv_device *device) .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24}, }; - result = - radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, &device->meta_state.itoi.img_p_layout); + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, + &device->meta_state.itoi.img_p_layout); if (result != VK_SUCCESS) goto fail; @@ -632,9 +600,8 @@ radv_device_init_meta_itoi_state(struct radv_device *device) .layout = device->meta_state.itoi.img_p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info_3d, NULL, - &device->meta_state.itoi.pipeline_3d); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info_3d, + NULL, &device->meta_state.itoi.pipeline_3d); ralloc_free(cs_3d); return VK_SUCCESS; @@ -647,10 +614,9 @@ radv_device_finish_meta_itoi_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi.img_p_layout, - &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), - state->itoi.img_ds_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi.img_p_layout, &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itoi.img_ds_layout, + &state->alloc); for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline[i], &state->alloc); @@ -662,8 +628,7 @@ radv_device_finish_meta_itoi_state(struct radv_device *device) static nir_shader * build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev) { - const struct glsl_type *type = - glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); + const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT); nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_itoi_r32g32b32_cs"); b.shader->info.workgroup_size[0] = 8; @@ -672,8 +637,7 @@ build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev) input_img->data.descriptor_set = 0; input_img->data.binding = 0; - nir_variable *output_img = - nir_variable_create(b.shader, nir_var_image, img_type, "output_img"); + nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "output_img"); output_img->data.descriptor_set = 0; output_img->data.binding = 1; @@ -688,29 +652,24 @@ build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev) nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, src_offset); nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset); - nir_ssa_def *src_global_pos = - nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride), - nir_imul_imm(&b, nir_channel(&b, src_img_coord, 0), 3)); + nir_ssa_def *src_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride), + nir_imul_imm(&b, nir_channel(&b, src_img_coord, 0), 3)); - nir_ssa_def *dst_global_pos = - nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride), - nir_imul_imm(&b, nir_channel(&b, dst_img_coord, 0), 3)); + nir_ssa_def *dst_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride), + nir_imul_imm(&b, nir_channel(&b, dst_img_coord, 0), 3)); for (int chan = 0; chan < 3; chan++) { /* src */ nir_ssa_def *src_local_pos = nir_iadd_imm(&b, src_global_pos, chan); - nir_ssa_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), src_local_pos, - NULL); + nir_ssa_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), src_local_pos, NULL); /* dst */ nir_ssa_def *dst_local_pos = nir_iadd_imm(&b, dst_global_pos, chan); - nir_ssa_def *dst_coord = - nir_replicate(&b, dst_local_pos, 4); + nir_ssa_def *dst_coord = nir_replicate(&b, dst_local_pos, 4); - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord, - nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, 0), - nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF); + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord, nir_ssa_undef(&b, 1, 32), + nir_channel(&b, outval, 0), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF); } return b.shader; @@ -723,25 +682,23 @@ radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device) VkResult result; nir_shader *cs = build_nir_itoi_r32g32b32_compute_shader(device); - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 2, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - {.binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, - &device->meta_state.alloc, + VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + {.binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, &device->meta_state.itoi_r32g32b32.img_ds_layout); if (result != VK_SUCCESS) goto fail; @@ -754,8 +711,7 @@ radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device) .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24}, }; - result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, &device->meta_state.itoi_r32g32b32.img_p_layout); if (result != VK_SUCCESS) goto fail; @@ -777,9 +733,8 @@ radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device) .layout = device->meta_state.itoi_r32g32b32.img_p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info, NULL, - &device->meta_state.itoi_r32g32b32.pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + NULL, &device->meta_state.itoi_r32g32b32.pipeline); fail: ralloc_free(cs); @@ -791,12 +746,10 @@ radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi_r32g32b32.img_p_layout, - &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout( - radv_device_to_handle(device), state->itoi_r32g32b32.img_ds_layout, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->itoi_r32g32b32.pipeline, - &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi_r32g32b32.img_p_layout, &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), + state->itoi_r32g32b32.img_ds_layout, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->itoi_r32g32b32.pipeline, &state->alloc); } static nir_shader * @@ -807,8 +760,8 @@ build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d, int samples : is_multisampled ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D; const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); - nir_builder b = radv_meta_init_shader( - dev, MESA_SHADER_COMPUTE, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples); + nir_builder b = + radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples); b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = 8; @@ -829,8 +782,8 @@ build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d, int samples global_id = nir_vec(&b, comps, 4); for (uint32_t i = 0; i < samples; i++) { - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id, - nir_imm_int(&b, i), clear_val, nir_imm_int(&b, 0), .image_dim = dim); + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id, nir_imm_int(&b, i), + clear_val, nir_imm_int(&b, 0), .image_dim = dim); } return b.shader; @@ -857,8 +810,8 @@ create_cleari_pipeline(struct radv_device *device, int samples, VkPipeline *pipe .layout = device->meta_state.cleari.img_p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info, NULL, pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + NULL, pipeline); ralloc_free(cs); return result; } @@ -872,20 +825,18 @@ radv_device_init_meta_cleari_state(struct radv_device *device) * two descriptors one for the image being sampled * one for the buffer being written. */ - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, - &device->meta_state.alloc, + VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, &device->meta_state.cleari.img_ds_layout); if (result != VK_SUCCESS) goto fail; @@ -898,9 +849,8 @@ radv_device_init_meta_cleari_state(struct radv_device *device) .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20}, }; - result = - radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, &device->meta_state.cleari.img_p_layout); + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, + &device->meta_state.cleari.img_p_layout); if (result != VK_SUCCESS) goto fail; @@ -929,9 +879,8 @@ radv_device_init_meta_cleari_state(struct radv_device *device) .layout = device->meta_state.cleari.img_p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info_3d, NULL, - &device->meta_state.cleari.pipeline_3d); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info_3d, + NULL, &device->meta_state.cleari.pipeline_3d); ralloc_free(cs_3d); return VK_SUCCESS; @@ -944,10 +893,9 @@ radv_device_finish_meta_cleari_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari.img_p_layout, - &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), - state->cleari.img_ds_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari.img_p_layout, &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->cleari.img_ds_layout, + &state->alloc); for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline[i], &state->alloc); @@ -977,17 +925,15 @@ build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev) nir_ssa_def *global_x = nir_channel(&b, global_id, 0); nir_ssa_def *global_y = nir_channel(&b, global_id, 1); - nir_ssa_def *global_pos = - nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul_imm(&b, global_x, 3)); + nir_ssa_def *global_pos = nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul_imm(&b, global_x, 3)); for (unsigned chan = 0; chan < 3; chan++) { nir_ssa_def *local_pos = nir_iadd_imm(&b, global_pos, chan); nir_ssa_def *coord = nir_replicate(&b, local_pos, 4); - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, - nir_ssa_undef(&b, 1, 32), nir_channel(&b, clear_val, chan), - nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF); + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32), + nir_channel(&b, clear_val, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF); } return b.shader; @@ -999,20 +945,18 @@ radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device) VkResult result; nir_shader *cs = build_nir_cleari_r32g32b32_compute_shader(device); - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, - &device->meta_state.alloc, + VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, &device->meta_state.cleari_r32g32b32.img_ds_layout); if (result != VK_SUCCESS) goto fail; @@ -1025,8 +969,7 @@ radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device) .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, }; - result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, &device->meta_state.cleari_r32g32b32.img_p_layout); if (result != VK_SUCCESS) goto fail; @@ -1047,9 +990,8 @@ radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device) .layout = device->meta_state.cleari_r32g32b32.img_p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info, NULL, - &device->meta_state.cleari_r32g32b32.pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + NULL, &device->meta_state.cleari_r32g32b32.pipeline); fail: ralloc_free(cs); @@ -1061,12 +1003,10 @@ radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari_r32g32b32.img_p_layout, - &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout( - radv_device_to_handle(device), state->cleari_r32g32b32.img_ds_layout, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->cleari_r32g32b32.pipeline, - &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari_r32g32b32.img_p_layout, &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), + state->cleari_r32g32b32.img_ds_layout, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->cleari_r32g32b32.pipeline, &state->alloc); } void @@ -1118,8 +1058,8 @@ radv_device_init_meta_bufimage_state(struct radv_device *device) } static void -create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, - struct radv_image_view *iview, VkFormat format, VkImageAspectFlagBits aspects) +create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, struct radv_image_view *iview, + VkFormat format, VkImageAspectFlagBits aspects) { if (format == VK_FORMAT_UNDEFINED) format = surf->format; @@ -1136,14 +1076,15 @@ create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *s .baseArrayLayer = surf->layer, .layerCount = 1}, }, - 0, &(struct radv_image_view_extra_create_info){ + 0, + &(struct radv_image_view_extra_create_info){ .disable_compression = surf->disable_compression, }); } static void -create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset, - VkFormat format, struct radv_buffer_view *bview) +create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset, VkFormat format, + struct radv_buffer_view *bview) { radv_buffer_view_init(bview, cmd_buffer->device, &(VkBufferViewCreateInfo){ @@ -1187,8 +1128,8 @@ create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_bl } static void -create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, - unsigned offset, VkFormat src_format, struct radv_buffer_view *bview) +create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset, + VkFormat src_format, struct radv_buffer_view *bview) { VkFormat format; @@ -1227,10 +1168,9 @@ create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffe * image view descriptors instead. */ static void -fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, - const struct radv_meta_blit2d_buffer *buf_bsurf, - const struct radv_meta_blit2d_surf *img_bsurf, - const struct radv_meta_blit2d_rect *rect, bool to_image) +fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_blit2d_buffer *buf_bsurf, + const struct radv_meta_blit2d_surf *img_bsurf, const struct radv_meta_blit2d_rect *rect, + bool to_image) { const unsigned mip_level = img_bsurf->level; const struct radv_image *image = img_bsurf->image; @@ -1241,8 +1181,7 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image); /* GFX10 will use a different workaround unless this is not a 2D image */ - if (rad_info->gfx_level < GFX9 || - (rad_info->gfx_level >= GFX10 && image->vk.image_type == VK_IMAGE_TYPE_2D) || + if (rad_info->gfx_level < GFX9 || (rad_info->gfx_level >= GFX10 && image->vk.image_type == VK_IMAGE_TYPE_2D) || image->vk.mip_levels == 1 || !vk_format_is_block_compressed(image->vk.format)) return; @@ -1258,8 +1197,7 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, /* The actual extent we want to copy */ VkExtent2D mip_extent = {rect->width, rect->height}; - VkOffset2D mip_offset = {to_image ? rect->dst_x : rect->src_x, - to_image ? rect->dst_y : rect->src_y}; + VkOffset2D mip_offset = {to_image ? rect->dst_x : rect->src_x, to_image ? rect->dst_y : rect->src_y}; if (hw_mip_extent.width >= mip_offset.x + mip_extent.width && hw_mip_extent.height >= mip_offset.y + mip_extent.height) @@ -1271,8 +1209,7 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, * while we're fixing them. If we're writing to an image, we do not need * to wait because the compute shader cannot write to those texels */ - cmd_buffer->state.flush_bits |= - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE; + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE; } for (uint32_t y = 0; y < mip_extent.height; y++) { @@ -1284,17 +1221,14 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, uint32_t x = (coordY < hw_mip_extent.height) ? hw_mip_extent.width : 0; for (; x < mip_extent.width; x++) { uint32_t coordX = x + mip_offset.x; - uint64_t addr = ac_surface_addr_from_coord(addrlib, rad_info, surf, &surf_info, - mip_level, coordX, coordY, img_bsurf->layer, - image->vk.image_type == VK_IMAGE_TYPE_3D); + uint64_t addr = ac_surface_addr_from_coord(addrlib, rad_info, surf, &surf_info, mip_level, coordX, coordY, + img_bsurf->layer, image->vk.image_type == VK_IMAGE_TYPE_3D); struct radeon_winsys_bo *img_bo = image->bindings[0].bo; struct radeon_winsys_bo *mem_bo = buf_bsurf->buffer->bo; const uint64_t img_offset = image->bindings[0].offset + addr; /* buf_bsurf->offset already includes the layer offset */ - const uint64_t mem_offset = buf_bsurf->buffer->offset + - buf_bsurf->offset + - y * buf_bsurf->pitch * surf->bpe + - x * surf->bpe; + const uint64_t mem_offset = + buf_bsurf->buffer->offset + buf_bsurf->offset + y * buf_bsurf->pitch * surf->bpe + x * surf->bpe; if (to_image) { radv_copy_buffer(cmd_buffer, mem_bo, img_bo, mem_offset, img_offset, surf->bpe); } else { @@ -1305,8 +1239,7 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, } static unsigned -get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_surf *surf) +get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf) { unsigned stride; @@ -1320,42 +1253,39 @@ get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, } static void -itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, - struct radv_buffer_view *dst) +itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, struct radv_buffer_view *dst) { struct radv_device *device = cmd_buffer->device; radv_meta_push_descriptor_set( cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itob.img_p_layout, 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){ - {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = - (VkDescriptorImageInfo[]){ - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(src), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - }}, - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)}, - }}); + 2, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = + (VkDescriptorImageInfo[]){ + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(src), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }}, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)}, + }}); } void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src, - struct radv_meta_blit2d_buffer *dst, unsigned num_rects, - struct radv_meta_blit2d_rect *rects) + struct radv_meta_blit2d_buffer *dst, unsigned num_rects, struct radv_meta_blit2d_rect *rects) { VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline; struct radv_device *device = cmd_buffer->device; @@ -1369,14 +1299,12 @@ radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_b if (src->image->vk.image_type == VK_IMAGE_TYPE_3D) pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d; - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); for (unsigned r = 0; r < num_rects; ++r) { unsigned push_constants[4] = {rects[r].src_x, rects[r].src_y, src->layer, dst->pitch}; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.itob.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, - 16, push_constants); + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.itob.img_p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants); radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); fixup_gfx9_cs_copy(cmd_buffer, dst, src, &rects[r], false); @@ -1393,31 +1321,28 @@ btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_ struct radv_device *device = cmd_buffer->device; radv_meta_push_descriptor_set( - cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi_r32g32b32.img_p_layout, - 0, /* set */ + cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi_r32g32b32.img_p_layout, 0, /* set */ 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){ - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, - .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)}, - }, - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)}, - }}); + (VkWriteDescriptorSet[]){{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)}, + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)}, + }}); } static void -radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_buffer *src, +radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src, struct radv_meta_blit2d_surf *dst, unsigned num_rects, struct radv_meta_blit2d_rect *rects) { @@ -1435,12 +1360,10 @@ radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer); create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view); - create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset, dst->format, - &dst_view); + create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset, dst->format, &dst_view); btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view); - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); stride = get_image_stride_for_r32g32b32(cmd_buffer, dst); @@ -1452,8 +1375,7 @@ radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, src->pitch, }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.btoi_r32g32b32.img_p_layout, + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.btoi_r32g32b32.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants); radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); @@ -1465,49 +1387,45 @@ radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, } static void -btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src, - struct radv_image_view *dst) +btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src, struct radv_image_view *dst) { struct radv_device *device = cmd_buffer->device; radv_meta_push_descriptor_set( cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi.img_p_layout, 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){ - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)}, - }, - {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]){ - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(dst), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - }}}); + 2, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]){{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)}, + }, + {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]){ + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(dst), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }}}); } void -radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_buffer *src, struct radv_meta_blit2d_surf *dst, - unsigned num_rects, struct radv_meta_blit2d_rect *rects) +radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src, + struct radv_meta_blit2d_surf *dst, unsigned num_rects, struct radv_meta_blit2d_rect *rects) { VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline; struct radv_device *device = cmd_buffer->device; struct radv_buffer_view src_view; struct radv_image_view dst_view; - if (dst->image->vk.format == VK_FORMAT_R32G32B32_UINT || - dst->image->vk.format == VK_FORMAT_R32G32B32_SINT || + if (dst->image->vk.format == VK_FORMAT_R32G32B32_UINT || dst->image->vk.format == VK_FORMAT_R32G32B32_SINT || dst->image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) { radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects); return; @@ -1519,8 +1437,7 @@ radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, if (dst->image->vk.image_type == VK_IMAGE_TYPE_3D) pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d; - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); for (unsigned r = 0; r < num_rects; ++r) { unsigned push_constants[4] = { @@ -1529,9 +1446,8 @@ radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, dst->layer, src->pitch, }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.btoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, - 16, push_constants); + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.btoi.img_p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants); radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); fixup_gfx9_cs_copy(cmd_buffer, src, dst, &rects[r], true); @@ -1548,31 +1464,28 @@ itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_ struct radv_device *device = cmd_buffer->device; radv_meta_push_descriptor_set( - cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi_r32g32b32.img_p_layout, - 0, /* set */ + cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi_r32g32b32.img_p_layout, 0, /* set */ 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){ - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, - .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)}, - }, - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)}, - }}); + (VkWriteDescriptorSet[]){{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)}, + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)}, + }}); } static void -radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_surf *src, +radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src, struct radv_meta_blit2d_surf *dst, unsigned num_rects, struct radv_meta_blit2d_rect *rects) { @@ -1594,14 +1507,11 @@ radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, create_buffer_from_image(cmd_buffer, src, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, &src_buffer); create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &dst_buffer); - create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer), src_offset, - src->format, &src_view); - create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer), dst_offset, - dst->format, &dst_view); + create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer), src_offset, src->format, &src_view); + create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer), dst_offset, dst->format, &dst_view); itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view); - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src); dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst); @@ -1610,8 +1520,7 @@ radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, unsigned push_constants[6] = { rects[r].src_x, rects[r].src_y, src_stride, rects[r].dst_x, rects[r].dst_y, dst_stride, }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.itoi_r32g32b32.img_p_layout, + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.itoi_r32g32b32.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants); radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); @@ -1624,45 +1533,43 @@ radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, } static void -itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, - struct radv_image_view *dst) +itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, struct radv_image_view *dst) { struct radv_device *device = cmd_buffer->device; - radv_meta_push_descriptor_set( - cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi.img_p_layout, 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = - (VkDescriptorImageInfo[]){ - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(src), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - }}, - {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]){ - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(dst), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - }}}); + radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi.img_p_layout, + 0, /* set */ + 2, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = + (VkDescriptorImageInfo[]){ + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(src), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }}, + {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]){ + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(dst), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }}}); } void radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src, - struct radv_meta_blit2d_surf *dst, unsigned num_rects, - struct radv_meta_blit2d_rect *rects) + struct radv_meta_blit2d_surf *dst, unsigned num_rects, struct radv_meta_blit2d_rect *rects) { struct radv_device *device = cmd_buffer->device; struct radv_image_view src_view, dst_view; @@ -1675,7 +1582,7 @@ radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta return; } - u_foreach_bit(i, dst->aspect_mask) { + u_foreach_bit (i, dst->aspect_mask) { unsigned aspect_mask = 1u << i; VkFormat depth_format = 0; if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) @@ -1689,19 +1596,16 @@ radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view); VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline[samples_log2]; - if (src->image->vk.image_type == VK_IMAGE_TYPE_3D || - dst->image->vk.image_type == VK_IMAGE_TYPE_3D) + if (src->image->vk.image_type == VK_IMAGE_TYPE_3D || dst->image->vk.image_type == VK_IMAGE_TYPE_3D) pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d; - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); for (unsigned r = 0; r < num_rects; ++r) { unsigned push_constants[6] = { rects[r].src_x, rects[r].src_y, src->layer, rects[r].dst_x, rects[r].dst_y, dst->layer, }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.itoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, - 24, push_constants); + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.itoi.img_p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants); radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); } @@ -1716,23 +1620,21 @@ cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct rad { struct radv_device *device = cmd_buffer->device; - radv_meta_push_descriptor_set( - cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.cleari_r32g32b32.img_p_layout, - 0, /* set */ - 1, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(view)}, - }}); + radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + device->meta_state.cleari_r32g32b32.img_p_layout, 0, /* set */ + 1, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]){{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(view)}, + }}); } static void -radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, - struct radv_meta_blit2d_surf *dst, +radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst, const VkClearColorValue *clear_color) { VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline; @@ -1747,12 +1649,10 @@ radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, */ create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer); - create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 0, dst->format, - &dst_view); + create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 0, dst->format, &dst_view); cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view); - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); stride = get_image_stride_for_r32g32b32(cmd_buffer, dst); @@ -1763,8 +1663,7 @@ radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, stride, }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.cleari_r32g32b32.img_p_layout, + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.cleari_r32g32b32.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants); radv_unaligned_dispatch(cmd_buffer, dst->image->vk.extent.width, dst->image->vk.extent.height, 1); @@ -1778,8 +1677,8 @@ cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_vi { struct radv_device *device = cmd_buffer->device; - radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - device->meta_state.cleari.img_p_layout, 0, /* set */ + radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.cleari.img_p_layout, + 0, /* set */ 1, /* descriptorWriteCount */ (VkWriteDescriptorSet[]){ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, @@ -1820,20 +1719,14 @@ radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_bl if (dst->image->vk.image_type == VK_IMAGE_TYPE_3D) pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d; - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); unsigned push_constants[5] = { - clear_color->uint32[0], - clear_color->uint32[1], - clear_color->uint32[2], - clear_color->uint32[3], - dst->layer, + clear_color->uint32[0], clear_color->uint32[1], clear_color->uint32[2], clear_color->uint32[3], dst->layer, }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.cleari.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 20, - push_constants); + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.cleari.img_p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, 20, push_constants); radv_unaligned_dispatch(cmd_buffer, dst->image->vk.extent.width, dst->image->vk.extent.height, 1); diff --git a/src/amd/vulkan/meta/radv_meta_clear.c b/src/amd/vulkan/meta/radv_meta_clear.c index 8f34b8a..f6849dd 100644 --- a/src/amd/vulkan/meta/radv_meta_clear.c +++ b/src/amd/vulkan/meta/radv_meta_clear.c @@ -27,8 +27,8 @@ #include "radv_private.h" #include "util/format_rgb9e5.h" -#include "vk_format.h" #include "vk_common_entrypoints.h" +#include "vk_format.h" enum { DEPTH_CLEAR_SLOW, DEPTH_CLEAR_FAST }; @@ -37,21 +37,17 @@ build_color_shaders(struct radv_device *dev, struct nir_shader **out_vs, struct uint32_t frag_output) { nir_builder vs_b = radv_meta_init_shader(dev, MESA_SHADER_VERTEX, "meta_clear_color_vs"); - nir_builder fs_b = - radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_clear_color_fs-%d", frag_output); + nir_builder fs_b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_clear_color_fs-%d", frag_output); const struct glsl_type *position_type = glsl_vec4_type(); const struct glsl_type *color_type = glsl_vec4_type(); - nir_variable *vs_out_pos = - nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position"); + nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position"); vs_out_pos->data.location = VARYING_SLOT_POS; - nir_ssa_def *in_color_load = - nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16); + nir_ssa_def *in_color_load = nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16); - nir_variable *fs_out_color = - nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color"); + nir_variable *fs_out_color = nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color"); fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output; nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf); @@ -60,8 +56,7 @@ build_color_shaders(struct radv_device *dev, struct nir_shader **out_vs, struct nir_store_var(&vs_b, vs_out_pos, outvec, 0xf); const struct glsl_type *layer_type = glsl_int_type(); - nir_variable *vs_out_layer = - nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer"); + nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer"); vs_out_layer->data.location = VARYING_SLOT_LAYER; vs_out_layer->data.interpolation = INTERP_MODE_FLAT; nir_ssa_def *inst_id = nir_load_instance_id(&vs_b); @@ -75,94 +70,90 @@ build_color_shaders(struct radv_device *dev, struct nir_shader **out_vs, struct } static VkResult -create_pipeline(struct radv_device *device, uint32_t samples, - struct nir_shader *vs_nir, struct nir_shader *fs_nir, +create_pipeline(struct radv_device *device, uint32_t samples, struct nir_shader *vs_nir, struct nir_shader *fs_nir, const VkPipelineVertexInputStateCreateInfo *vi_state, const VkPipelineDepthStencilStateCreateInfo *ds_state, - const VkPipelineColorBlendStateCreateInfo *cb_state, - const VkPipelineRenderingCreateInfo *dyn_state, - const VkPipelineLayout layout, - const struct radv_graphics_pipeline_create_info *extra, + const VkPipelineColorBlendStateCreateInfo *cb_state, const VkPipelineRenderingCreateInfo *dyn_state, + const VkPipelineLayout layout, const struct radv_graphics_pipeline_create_info *extra, const VkAllocationCallbacks *alloc, VkPipeline *pipeline) { VkDevice device_h = radv_device_to_handle(device); VkResult result; - result = radv_graphics_pipeline_create( - device_h, device->meta_state.cache, - &(VkGraphicsPipelineCreateInfo){ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = dyn_state, - .stageCount = fs_nir ? 2 : 1, - .pStages = - (VkPipelineShaderStageCreateInfo[]){ - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = vk_shader_module_handle_from_nir(vs_nir), - .pName = "main", - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = vk_shader_module_handle_from_nir(fs_nir), - .pName = "main", - }, - }, - .pVertexInputState = vi_state, - .pInputAssemblyState = - &(VkPipelineInputAssemblyStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = false, - }, - .pViewportState = - &(VkPipelineViewportStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, - }, - .pRasterizationState = - &(VkPipelineRasterizationStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .rasterizerDiscardEnable = false, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, - .depthBiasEnable = false, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f, - }, - .pMultisampleState = - &(VkPipelineMultisampleStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterizationSamples = samples, - .sampleShadingEnable = false, - .pSampleMask = NULL, - .alphaToCoverageEnable = false, - .alphaToOneEnable = false, - }, - .pDepthStencilState = ds_state, - .pColorBlendState = cb_state, - .pDynamicState = - &(VkPipelineDynamicStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 3, - .pDynamicStates = - (VkDynamicState[]){ - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_STENCIL_REFERENCE, - }, - }, - .layout = layout, - .flags = 0, - .renderPass = VK_NULL_HANDLE, - .subpass = 0, - }, - extra, alloc, pipeline); + result = radv_graphics_pipeline_create(device_h, device->meta_state.cache, + &(VkGraphicsPipelineCreateInfo){ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = dyn_state, + .stageCount = fs_nir ? 2 : 1, + .pStages = + (VkPipelineShaderStageCreateInfo[]){ + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = vk_shader_module_handle_from_nir(vs_nir), + .pName = "main", + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = vk_shader_module_handle_from_nir(fs_nir), + .pName = "main", + }, + }, + .pVertexInputState = vi_state, + .pInputAssemblyState = + &(VkPipelineInputAssemblyStateCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = + &(VkPipelineViewportStateCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = + &(VkPipelineRasterizationStateCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + .depthBiasEnable = false, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, + }, + .pMultisampleState = + &(VkPipelineMultisampleStateCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = samples, + .sampleShadingEnable = false, + .pSampleMask = NULL, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + }, + .pDepthStencilState = ds_state, + .pColorBlendState = cb_state, + .pDynamicState = + &(VkPipelineDynamicStateCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 3, + .pDynamicStates = + (VkDynamicState[]){ + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .layout = layout, + .flags = 0, + .renderPass = VK_NULL_HANDLE, + .subpass = 0, + }, + extra, alloc, pipeline); ralloc_free(vs_nir); ralloc_free(fs_nir); @@ -171,8 +162,8 @@ create_pipeline(struct radv_device *device, uint32_t samples, } static VkResult -create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output, - VkFormat format, VkPipeline *pipeline) +create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output, VkFormat format, + VkPipeline *pipeline) { struct nir_shader *vs_nir; struct nir_shader *fs_nir; @@ -205,8 +196,8 @@ create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t fra VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = {0}; blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState){ .blendEnable = false, - .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT, + .colorWriteMask = + VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT, }; const VkPipelineColorBlendStateCreateInfo cb_state = { @@ -214,9 +205,9 @@ create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t fra .logicOpEnable = false, .attachmentCount = MAX_RTS, .pAttachments = blend_attachment_state, - .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }}; + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}}; - VkFormat att_formats[MAX_RTS] = { 0 }; + VkFormat att_formats[MAX_RTS] = {0}; att_formats[frag_output] = format; const VkPipelineRenderingCreateInfo rendering_create_info = { @@ -228,10 +219,8 @@ create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t fra struct radv_graphics_pipeline_create_info extra = { .use_rectlist = true, }; - result = - create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, - &rendering_create_info, device->meta_state.clear_color_p_layout, - &extra, &device->meta_state.alloc, pipeline); + result = create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, &rendering_create_info, + device->meta_state.clear_color_p_layout, &extra, &device->meta_state.alloc, pipeline); mtx_unlock(&device->meta_state.mtx); return result; @@ -242,12 +231,10 @@ finish_meta_clear_htile_mask_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; - radv_DestroyPipeline(radv_device_to_handle(device), state->clear_htile_mask_pipeline, - &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_htile_mask_p_layout, - &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout( - radv_device_to_handle(device), state->clear_htile_mask_ds_layout, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->clear_htile_mask_pipeline, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_htile_mask_p_layout, &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), + state->clear_htile_mask_ds_layout, &state->alloc); } static void @@ -256,13 +243,11 @@ finish_meta_clear_dcc_comp_to_single_state(struct radv_device *device) struct radv_meta_state *state = &device->meta_state; for (uint32_t i = 0; i < 2; i++) { - radv_DestroyPipeline(radv_device_to_handle(device), - state->clear_dcc_comp_to_single_pipeline[i], &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->clear_dcc_comp_to_single_pipeline[i], &state->alloc); } - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_dcc_comp_to_single_p_layout, - &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout( - radv_device_to_handle(device), state->clear_dcc_comp_to_single_ds_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_dcc_comp_to_single_p_layout, &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), + state->clear_dcc_comp_to_single_ds_layout, &state->alloc); } void @@ -273,45 +258,38 @@ radv_device_finish_meta_clear_state(struct radv_device *device) for (uint32_t i = 0; i < ARRAY_SIZE(state->color_clear); ++i) { for (uint32_t j = 0; j < ARRAY_SIZE(state->color_clear[0]); ++j) { for (uint32_t k = 0; k < ARRAY_SIZE(state->color_clear[i][j].color_pipelines); ++k) { - radv_DestroyPipeline(radv_device_to_handle(device), - state->color_clear[i][j].color_pipelines[k], &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->color_clear[i][j].color_pipelines[k], + &state->alloc); } } } for (uint32_t i = 0; i < ARRAY_SIZE(state->ds_clear); ++i) { for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) { - radv_DestroyPipeline(radv_device_to_handle(device), - state->ds_clear[i].depth_only_pipeline[j], &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->ds_clear[i].stencil_only_pipeline[j], &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->ds_clear[i].depthstencil_pipeline[j], &state->alloc); - - radv_DestroyPipeline(radv_device_to_handle(device), - state->ds_clear[i].depth_only_unrestricted_pipeline[j], + radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depth_only_pipeline[j], &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].stencil_only_pipeline[j], &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->ds_clear[i].stencil_only_unrestricted_pipeline[j], + radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depthstencil_pipeline[j], &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->ds_clear[i].depthstencil_unrestricted_pipeline[j], + + radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depth_only_unrestricted_pipeline[j], &state->alloc); - } - } - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_color_p_layout, + radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].stencil_only_unrestricted_pipeline[j], &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_p_layout, + radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depthstencil_unrestricted_pipeline[j], &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), - state->clear_depth_unrestricted_p_layout, &state->alloc); + } + } + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_color_p_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_p_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_unrestricted_p_layout, &state->alloc); finish_meta_clear_htile_mask_state(device); finish_meta_clear_dcc_comp_to_single_state(device); } static void -emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, - const VkClearRect *clear_rect, uint32_t view_mask) +emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, const VkClearRect *clear_rect, + uint32_t view_mask) { struct radv_device *device = cmd_buffer->device; const struct radv_rendering_state *render = &cmd_buffer->state.render; @@ -324,8 +302,7 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *cl assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); assert(clear_att->colorAttachment < render->color_att_count); - const struct radv_attachment *color_att = - &render->color_att[clear_att->colorAttachment]; + const struct radv_attachment *color_att = &render->color_att[clear_att->colorAttachment]; /* When a framebuffer is bound to the current command buffer, get the * number of samples from it. Otherwise, get the number of samples from @@ -345,27 +322,24 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *cl fs_key = radv_format_meta_fs_key(device, format); assert(fs_key != -1); - if (device->meta_state.color_clear[samples_log2][clear_att->colorAttachment] - .color_pipelines[fs_key] == VK_NULL_HANDLE) { + if (device->meta_state.color_clear[samples_log2][clear_att->colorAttachment].color_pipelines[fs_key] == + VK_NULL_HANDLE) { VkResult ret = create_color_pipeline( device, samples, clear_att->colorAttachment, radv_fs_key_format_exemplars[fs_key], - &device->meta_state.color_clear[samples_log2][clear_att->colorAttachment] - .color_pipelines[fs_key]); + &device->meta_state.color_clear[samples_log2][clear_att->colorAttachment].color_pipelines[fs_key]); if (ret != VK_SUCCESS) { vk_command_buffer_set_error(&cmd_buffer->vk, ret); return; } } - pipeline = device->meta_state.color_clear[samples_log2][clear_att->colorAttachment] - .color_pipelines[fs_key]; + pipeline = device->meta_state.color_clear[samples_log2][clear_att->colorAttachment].color_pipelines[fs_key]; assert(samples_log2 < ARRAY_SIZE(device->meta_state.color_clear)); assert(pipeline); - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.clear_color_p_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, - 16, &clear_value); + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.clear_color_p_layout, + VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, &clear_value); radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); @@ -380,36 +354,33 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *cl radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect); if (view_mask) { - u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i); + u_foreach_bit (i, view_mask) + radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i); } else { radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer); } } static void -build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs, - struct nir_shader **out_fs, bool unrestricted) +build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs, + bool unrestricted) { nir_builder vs_b = radv_meta_init_shader( - dev, MESA_SHADER_VERTEX, - unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs"); - nir_builder fs_b = radv_meta_init_shader( - dev, MESA_SHADER_FRAGMENT, - unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs"); + dev, MESA_SHADER_VERTEX, unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs"); + nir_builder fs_b = + radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, + unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs"); const struct glsl_type *position_out_type = glsl_vec4_type(); - nir_variable *vs_out_pos = - nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position"); + nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position"); vs_out_pos->data.location = VARYING_SLOT_POS; nir_ssa_def *z; if (unrestricted) { - nir_ssa_def *in_color_load = - nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4); + nir_ssa_def *in_color_load = nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4); - nir_variable *fs_out_depth = - nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth"); + nir_variable *fs_out_depth = nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth"); fs_out_depth->data.location = FRAG_RESULT_DEPTH; nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1); @@ -422,8 +393,7 @@ build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs, nir_store_var(&vs_b, vs_out_pos, outvec, 0xf); const struct glsl_type *layer_type = glsl_int_type(); - nir_variable *vs_out_layer = - nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer"); + nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer"); vs_out_layer->data.location = VARYING_SLOT_LAYER; vs_out_layer->data.interpolation = INTERP_MODE_FLAT; nir_ssa_def *inst_id = nir_load_instance_id(&vs_b); @@ -437,8 +407,8 @@ build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs, } static VkResult -create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspects, - uint32_t samples, int index, bool unrestricted, VkPipeline *pipeline) +create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspects, uint32_t samples, int index, + bool unrestricted, VkPipeline *pipeline) { struct nir_shader *vs_nir, *fs_nir; VkResult result; @@ -481,15 +451,13 @@ create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspe .logicOpEnable = false, .attachmentCount = 0, .pAttachments = NULL, - .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, }; const VkPipelineRenderingCreateInfo rendering_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, - .depthAttachmentFormat = - (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? VK_FORMAT_D32_SFLOAT : VK_FORMAT_UNDEFINED, - .stencilAttachmentFormat = - (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? VK_FORMAT_S8_UINT : VK_FORMAT_UNDEFINED, + .depthAttachmentFormat = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? VK_FORMAT_D32_SFLOAT : VK_FORMAT_UNDEFINED, + .stencilAttachmentFormat = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? VK_FORMAT_S8_UINT : VK_FORMAT_UNDEFINED, }; struct radv_graphics_pipeline_create_info extra = { @@ -502,57 +470,49 @@ create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspe if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true; } - result = - create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, - &rendering_create_info, device->meta_state.clear_depth_p_layout, &extra, - &device->meta_state.alloc, pipeline); + result = create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, &rendering_create_info, + device->meta_state.clear_depth_p_layout, &extra, &device->meta_state.alloc, pipeline); mtx_unlock(&device->meta_state.mtx); return result; } -static bool -radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, - VkImageLayout image_layout, - VkImageAspectFlags aspects, const VkClearRect *clear_rect, - const VkClearDepthStencilValue clear_value, uint32_t view_mask); +static bool radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, + VkImageLayout image_layout, VkImageAspectFlags aspects, + const VkClearRect *clear_rect, const VkClearDepthStencilValue clear_value, + uint32_t view_mask); static VkPipeline pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_state *meta_state, - const struct radv_image_view *iview, int samples_log2, - VkImageAspectFlags aspects, VkImageLayout layout, - const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value, + const struct radv_image_view *iview, int samples_log2, VkImageAspectFlags aspects, + VkImageLayout layout, const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value, uint32_t view_mask) { - bool fast = radv_can_fast_clear_depth(cmd_buffer, iview, layout, aspects, clear_rect, - clear_value, view_mask); + bool fast = radv_can_fast_clear_depth(cmd_buffer, iview, layout, aspects, clear_rect, clear_value, view_mask); bool unrestricted = cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted; int index = fast ? DEPTH_CLEAR_FAST : DEPTH_CLEAR_SLOW; VkPipeline *pipeline; switch (aspects) { case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: - pipeline = unrestricted - ? &meta_state->ds_clear[samples_log2].depthstencil_unrestricted_pipeline[index] - : &meta_state->ds_clear[samples_log2].depthstencil_pipeline[index]; + pipeline = unrestricted ? &meta_state->ds_clear[samples_log2].depthstencil_unrestricted_pipeline[index] + : &meta_state->ds_clear[samples_log2].depthstencil_pipeline[index]; break; case VK_IMAGE_ASPECT_DEPTH_BIT: - pipeline = unrestricted - ? &meta_state->ds_clear[samples_log2].depth_only_unrestricted_pipeline[index] - : &meta_state->ds_clear[samples_log2].depth_only_pipeline[index]; + pipeline = unrestricted ? &meta_state->ds_clear[samples_log2].depth_only_unrestricted_pipeline[index] + : &meta_state->ds_clear[samples_log2].depth_only_pipeline[index]; break; case VK_IMAGE_ASPECT_STENCIL_BIT: - pipeline = unrestricted - ? &meta_state->ds_clear[samples_log2].stencil_only_unrestricted_pipeline[index] - : &meta_state->ds_clear[samples_log2].stencil_only_pipeline[index]; + pipeline = unrestricted ? &meta_state->ds_clear[samples_log2].stencil_only_unrestricted_pipeline[index] + : &meta_state->ds_clear[samples_log2].stencil_only_pipeline[index]; break; default: unreachable("expected depth or stencil aspect"); } if (*pipeline == VK_NULL_HANDLE) { - VkResult ret = create_depthstencil_pipeline( - cmd_buffer->device, aspects, 1u << samples_log2, index, unrestricted, pipeline); + VkResult ret = + create_depthstencil_pipeline(cmd_buffer->device, aspects, 1u << samples_log2, index, unrestricted, pipeline); if (ret != VK_SUCCESS) { vk_command_buffer_set_error(&cmd_buffer->vk, ret); return VK_NULL_HANDLE; @@ -592,13 +552,11 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachm clear_value.depth = 1.0f; if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted) { - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.clear_depth_unrestricted_p_layout, + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.clear_depth_unrestricted_p_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4, &clear_value.depth); } else { - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.clear_depth_p_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, - 4, &clear_value.depth); + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.clear_depth_p_layout, + VK_SHADER_STAGE_VERTEX_BIT, 0, 4, &clear_value.depth); } uint32_t prev_reference = cmd_buffer->state.dynamic.vk.ds.stencil.front.reference; @@ -606,16 +564,14 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachm radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, clear_value.stencil); } - VkPipeline pipeline = - pick_depthstencil_pipeline(cmd_buffer, meta_state, iview, samples_log2, aspects, - render->ds_att.layout, clear_rect, clear_value, view_mask); + VkPipeline pipeline = pick_depthstencil_pipeline(cmd_buffer, meta_state, iview, samples_log2, aspects, + render->ds_att.layout, clear_rect, clear_value, view_mask); if (!pipeline) return; radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - if (radv_can_fast_clear_depth(cmd_buffer, iview, render->ds_att.layout, aspects, - clear_rect, clear_value, view_mask)) + if (radv_can_fast_clear_depth(cmd_buffer, iview, render->ds_att.layout, aspects, clear_rect, clear_value, view_mask)) radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects); radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, @@ -629,7 +585,8 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachm radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect); if (view_mask) { - u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i); + u_foreach_bit (i, view_mask) + radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i); } else { radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer); } @@ -640,9 +597,8 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachm } static uint32_t -clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, - struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t htile_value, - uint32_t htile_mask) +clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, struct radeon_winsys_bo *bo, + uint64_t offset, uint64_t size, uint32_t htile_value, uint32_t htile_mask) { struct radv_device *device = cmd_buffer->device; struct radv_meta_state *state = &device->meta_state; @@ -650,9 +606,8 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im struct radv_meta_saved_state saved_state; struct radv_buffer dst_buffer; - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); radv_buffer_init(&dst_buffer, device, bo, size, offset); @@ -661,16 +616,15 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im radv_meta_push_descriptor_set( cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->clear_htile_mask_p_layout, 0, /* set */ - 1, /* descriptorWriteCount */ + 1, /* descriptorWriteCount */ (VkWriteDescriptorSet[]){ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstBinding = 0, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer), - .offset = 0, - .range = size}}}); + .pBufferInfo = + &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer), .offset = 0, .range = size}}}); const unsigned constants[2] = { htile_value & htile_mask, @@ -686,8 +640,7 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im radv_meta_restore(&saved_state, cmd_buffer); - return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); + return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); } static uint32_t @@ -710,9 +663,7 @@ radv_get_htile_fast_clear_value(const struct radv_device *device, const struct r * +---------+---------+-------+ * | Max Z | Min Z | ZMask | */ - htile_value = (((zmax & 0x3fff) << 18) | - ((zmin & 0x3fff) << 4) | - ((zmask & 0xf) << 0)); + htile_value = (((zmax & 0x3fff) << 18) | ((zmin & 0x3fff) << 4) | ((zmask & 0xf) << 0)); } else { /* Z and stencil: @@ -733,18 +684,14 @@ radv_get_htile_fast_clear_value(const struct radv_device *device, const struct r if (radv_image_has_vrs_htile(device, image)) sresults = 0x3; - htile_value = (((zrange & 0xfffff) << 12) | - ((smem & 0x3) << 8) | - ((sresults & 0xf) << 4) | - ((zmask & 0xf) << 0)); + htile_value = (((zrange & 0xfffff) << 12) | ((smem & 0x3) << 8) | ((sresults & 0xf) << 4) | ((zmask & 0xf) << 0)); } return htile_value; } static uint32_t -radv_get_htile_mask(const struct radv_device *device, const struct radv_image *image, - VkImageAspectFlags aspects) +radv_get_htile_mask(const struct radv_device *device, const struct radv_image *image, VkImageAspectFlags aspects) { uint32_t mask = 0; @@ -775,17 +722,14 @@ radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value) static bool radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, - VkImageLayout image_layout, - VkImageAspectFlags aspects, const VkClearRect *clear_rect, + VkImageLayout image_layout, VkImageAspectFlags aspects, const VkClearRect *clear_rect, const VkClearDepthStencilValue clear_value, uint32_t view_mask) { if (!iview || !iview->support_fast_clear) return false; - if (!radv_layout_is_htile_compressed( - cmd_buffer->device, iview->image, image_layout, - radv_image_queue_family_mask(iview->image, cmd_buffer->qf, - cmd_buffer->qf))) + if (!radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, image_layout, + radv_image_queue_family_mask(iview->image, cmd_buffer->qf, cmd_buffer->qf))) return false; if (clear_rect->rect.offset.x || clear_rect->rect.offset.y || @@ -793,8 +737,7 @@ radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_ clear_rect->rect.extent.height != iview->image->vk.extent.height) return false; - if (view_mask && (iview->image->vk.array_layers >= 32 || - (1u << iview->image->vk.array_layers) - 1u != view_mask)) + if (view_mask && (iview->image->vk.array_layers >= 32 || (1u << iview->image->vk.array_layers) - 1u != view_mask)) return false; if (!view_mask && clear_rect->baseArrayLayer != 0) return false; @@ -802,14 +745,12 @@ radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_ return false; if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted && - (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && - (clear_value.depth < 0.0 || clear_value.depth > 1.0)) + (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && (clear_value.depth < 0.0 || clear_value.depth > 1.0)) return false; if (radv_image_is_tc_compat_htile(iview->image) && (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && !radv_is_fast_clear_depth_allowed(clear_value)) || - ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && - !radv_is_fast_clear_stencil_allowed(clear_value)))) + ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && !radv_is_fast_clear_stencil_allowed(clear_value)))) return false; if (iview->image->vk.mip_levels > 1) { @@ -836,10 +777,8 @@ radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_imag if (pre_flush) { enum radv_cmd_flush_bits bits = - radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, - iview->image) | - radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT | - VK_ACCESS_2_SHADER_READ_BIT, iview->image); + radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, iview->image) | + radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT | VK_ACCESS_2_SHADER_READ_BIT, iview->image); cmd_buffer->state.flush_bits |= bits & ~*pre_flush; *pre_flush |= cmd_buffer->state.flush_bits; } @@ -903,20 +842,19 @@ init_meta_clear_htile_mask_state(struct radv_device *device) VkResult result; nir_shader *cs = build_clear_htile_mask_shader(device); - VkDescriptorSetLayoutCreateInfo ds_layout_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, - &state->alloc, &state->clear_htile_mask_ds_layout); + VkDescriptorSetLayoutCreateInfo ds_layout_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, &state->alloc, + &state->clear_htile_mask_ds_layout); if (result != VK_SUCCESS) goto fail; @@ -953,9 +891,8 @@ init_meta_clear_htile_mask_state(struct radv_device *device) .layout = state->clear_htile_mask_p_layout, }; - result = - radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &pipeline_info, - NULL, &state->clear_htile_mask_pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &pipeline_info, NULL, + &state->clear_htile_mask_pipeline); fail: ralloc_free(cs); @@ -971,9 +908,8 @@ build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa) enum glsl_sampler_dim dim = is_msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D; const struct glsl_type *img_type = glsl_image_type(dim, true, GLSL_TYPE_FLOAT); - nir_builder b = - radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_dcc_comp_to_single-%s", - is_msaa ? "multisampled" : "singlesampled"); + nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_dcc_comp_to_single-%s", + is_msaa ? "multisampled" : "singlesampled"); b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = 8; @@ -985,10 +921,8 @@ build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa) /* Compute the coordinates. */ nir_ssa_def *coord = nir_trim_vector(&b, global_id, 2); coord = nir_imul(&b, coord, dcc_block_size); - coord = nir_vec4(&b, nir_channel(&b, coord, 0), - nir_channel(&b, coord, 1), - nir_channel(&b, global_id, 2), - nir_ssa_undef(&b, 1, 32)); + coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), nir_channel(&b, global_id, 2), + nir_ssa_undef(&b, 1, 32)); nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img"); output_img->data.descriptor_set = 0; @@ -997,15 +931,12 @@ build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa) /* Load the clear color values. */ nir_ssa_def *clear_values = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8); - nir_ssa_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0), - nir_channel(&b, clear_values, 1), - nir_channel(&b, clear_values, 1), - nir_channel(&b, clear_values, 1)); + nir_ssa_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0), nir_channel(&b, clear_values, 1), + nir_channel(&b, clear_values, 1), nir_channel(&b, clear_values, 1)); /* Store the clear color values. */ nir_ssa_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_ssa_undef(&b, 1, 32); - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, - sample_id, data, nir_imm_int(&b, 0), + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, sample_id, data, nir_imm_int(&b, 0), .image_dim = dim, .image_array = true); return b.shader; @@ -1033,8 +964,7 @@ create_dcc_comp_to_single_pipeline(struct radv_device *device, bool is_msaa, VkP .layout = state->clear_dcc_comp_to_single_p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, - &pipeline_info, NULL, pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &pipeline_info, NULL, pipeline); ralloc_free(cs); return result; @@ -1046,20 +976,19 @@ init_meta_clear_dcc_comp_to_single_state(struct radv_device *device) struct radv_meta_state *state = &device->meta_state; VkResult result; - VkDescriptorSetLayoutCreateInfo ds_layout_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, - &state->alloc, &state->clear_dcc_comp_to_single_ds_layout); + VkDescriptorSetLayoutCreateInfo ds_layout_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, &state->alloc, + &state->clear_dcc_comp_to_single_ds_layout); if (result != VK_SUCCESS) goto fail; @@ -1082,8 +1011,7 @@ init_meta_clear_dcc_comp_to_single_state(struct radv_device *device) goto fail; for (uint32_t i = 0; i < 2; i++) { - result = create_dcc_comp_to_single_pipeline(device, !!i, - &state->clear_dcc_comp_to_single_pipeline[i]); + result = create_dcc_comp_to_single_pipeline(device, !!i, &state->clear_dcc_comp_to_single_pipeline[i]); if (result != VK_SUCCESS) goto fail; } @@ -1105,8 +1033,7 @@ radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand) .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16}, }; - res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_color_create_info, - &device->meta_state.alloc, + res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_color_create_info, &device->meta_state.alloc, &device->meta_state.clear_color_p_layout); if (res != VK_SUCCESS) return res; @@ -1118,8 +1045,7 @@ radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand) .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4}, }; - res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_depth_create_info, - &device->meta_state.alloc, + res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_depth_create_info, &device->meta_state.alloc, &device->meta_state.clear_depth_p_layout); if (res != VK_SUCCESS) return res; @@ -1131,9 +1057,8 @@ radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand) .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4}, }; - res = radv_CreatePipelineLayout(radv_device_to_handle(device), - &pl_depth_unrestricted_create_info, &device->meta_state.alloc, - &device->meta_state.clear_depth_unrestricted_p_layout); + res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_depth_unrestricted_create_info, + &device->meta_state.alloc, &device->meta_state.clear_depth_unrestricted_p_layout); if (res != VK_SUCCESS) return res; @@ -1157,8 +1082,7 @@ radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand) unsigned fs_key = radv_format_meta_fs_key(device, format); assert(!state->color_clear[i][0].color_pipelines[fs_key]); - res = create_color_pipeline(device, samples, 0, format, - &state->color_clear[i][0].color_pipelines[fs_key]); + res = create_color_pipeline(device, samples, 0, format, &state->color_clear[i][0].color_pipelines[fs_key]); if (res != VK_SUCCESS) return res; } @@ -1177,9 +1101,8 @@ radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand) if (res != VK_SUCCESS) return res; - res = create_depthstencil_pipeline( - device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false, - &state->ds_clear[i].depthstencil_pipeline[j]); + res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, + false, &state->ds_clear[i].depthstencil_pipeline[j]); if (res != VK_SUCCESS) return res; @@ -1188,15 +1111,13 @@ radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand) if (res != VK_SUCCESS) return res; - res = - create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true, - &state->ds_clear[i].stencil_only_unrestricted_pipeline[j]); + res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true, + &state->ds_clear[i].stencil_only_unrestricted_pipeline[j]); if (res != VK_SUCCESS) return res; - res = create_depthstencil_pipeline( - device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true, - &state->ds_clear[i].depthstencil_unrestricted_pipeline[j]); + res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, + true, &state->ds_clear[i].depthstencil_unrestricted_pipeline[j]); if (res != VK_SUCCESS) return res; } @@ -1221,8 +1142,8 @@ radv_get_cmask_fast_clear_value(const struct radv_image *image) } uint32_t -radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - const VkImageSubresourceRange *range, uint32_t value) +radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range, + uint32_t value) { uint64_t offset = image->bindings[0].offset + image->planes[0].surface.cmask_offset; uint64_t size; @@ -1237,13 +1158,13 @@ radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, size = slice_size * vk_image_subresource_layer_count(&image->vk, range); } - return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, - radv_buffer_get_va(image->bindings[0].bo) + offset, size, value); + return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, radv_buffer_get_va(image->bindings[0].bo) + offset, + size, value); } uint32_t -radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - const VkImageSubresourceRange *range, uint32_t value) +radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range, + uint32_t value) { uint64_t offset = image->bindings[0].offset + image->planes[0].surface.fmask_offset; unsigned slice_size = image->planes[0].surface.fmask_slice_size; @@ -1255,13 +1176,13 @@ radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, offset += slice_size * range->baseArrayLayer; size = slice_size * vk_image_subresource_layer_count(&image->vk, range); - return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, - radv_buffer_get_va(image->bindings[0].bo) + offset, size, value); + return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, radv_buffer_get_va(image->bindings[0].bo) + offset, + size, value); } uint32_t -radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - const VkImageSubresourceRange *range, uint32_t value) +radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range, + uint32_t value) { uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, range); @@ -1285,8 +1206,7 @@ radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, assert(level == 0); size = image->planes[0].surface.meta_size; } else { - const struct legacy_surf_dcc_level *dcc_level = - &image->planes[0].surface.u.legacy.color.dcc_level[level]; + const struct legacy_surf_dcc_level *dcc_level = &image->planes[0].surface.u.legacy.color.dcc_level[level]; /* If dcc_fast_clear_size is 0 (which might happens for * mipmaps) the fill buffer operation below is a no-op. @@ -1294,8 +1214,7 @@ radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, * fast clear path fallbacks to slow clears if one * level can't be fast cleared. */ - offset += - dcc_level->dcc_offset + dcc_level->dcc_slice_fast_clear_size * range->baseArrayLayer; + offset += dcc_level->dcc_offset + dcc_level->dcc_slice_fast_clear_size * range->baseArrayLayer; size = dcc_level->dcc_slice_fast_clear_size * vk_image_subresource_layer_count(&image->vk, range); } @@ -1304,18 +1223,15 @@ radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, continue; flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, - radv_buffer_get_va(image->bindings[0].bo) + offset, - size, value); + radv_buffer_get_va(image->bindings[0].bo) + offset, size, value); } return flush_bits; } static uint32_t -radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - const VkImageSubresourceRange *range, - uint32_t color_values[2]) +radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + const VkImageSubresourceRange *range, uint32_t color_values[2]) { struct radv_device *device = cmd_buffer->device; unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format); @@ -1345,14 +1261,12 @@ radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer, unreachable("Unsupported number of bytes per pixel"); } - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS); VkPipeline pipeline = device->meta_state.clear_dcc_comp_to_single_pipeline[is_msaa]; - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, range); l++) { uint32_t width, height; @@ -1364,43 +1278,37 @@ radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer, width = radv_minify(image->vk.extent.width, range->baseMipLevel + l); height = radv_minify(image->vk.extent.height, range->baseMipLevel + l); - radv_image_view_init( - &iview, cmd_buffer->device, - &(VkImageViewCreateInfo){ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = radv_image_to_handle(image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = format, - .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = range->baseMipLevel + l, - .levelCount = 1, - .baseArrayLayer = range->baseArrayLayer, - .layerCount = layer_count}, - }, - 0, &(struct radv_image_view_extra_create_info){.disable_compression = true}); - - radv_meta_push_descriptor_set( - cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - device->meta_state.clear_dcc_comp_to_single_p_layout, 0, - 1, - (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = - (VkDescriptorImageInfo[]){ - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(&iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - }}}); - - unsigned dcc_width = - DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width); - unsigned dcc_height = - DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height); + radv_image_view_init(&iview, cmd_buffer->device, + &(VkImageViewCreateInfo){ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = radv_image_to_handle(image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = format, + .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = range->baseMipLevel + l, + .levelCount = 1, + .baseArrayLayer = range->baseArrayLayer, + .layerCount = layer_count}, + }, + 0, &(struct radv_image_view_extra_create_info){.disable_compression = true}); + + radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + device->meta_state.clear_dcc_comp_to_single_p_layout, 0, 1, + (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]){ + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(&iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }}}); + + unsigned dcc_width = DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width); + unsigned dcc_height = DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height); const unsigned constants[4] = { image->planes[0].surface.u.gfx9.color.dcc_block_width, @@ -1409,8 +1317,7 @@ radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer, color_values[1], }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.clear_dcc_comp_to_single_p_layout, + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.clear_dcc_comp_to_single_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, constants); radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, layer_count); @@ -1420,8 +1327,7 @@ radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer, radv_meta_restore(&saved_state, cmd_buffer); - return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); + return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); } uint32_t @@ -1451,12 +1357,10 @@ radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im if (htile_mask == UINT_MAX) { /* Clear the whole HTILE buffer. */ flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, - radv_buffer_get_va(image->bindings[0].bo) + offset, - size, value); + radv_buffer_get_va(image->bindings[0].bo) + offset, size, value); } else { /* Only clear depth or stencil bytes in the HTILE buffer. */ - flush_bits |= - clear_htile_mask(cmd_buffer, image, image->bindings[0].bo, offset, size, value, htile_mask); + flush_bits |= clear_htile_mask(cmd_buffer, image, image->bindings[0].bo, offset, size, value, htile_mask); } } } else { @@ -1468,12 +1372,10 @@ radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im if (htile_mask == UINT_MAX) { /* Clear the whole HTILE buffer. */ flush_bits = radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, - radv_buffer_get_va(image->bindings[0].bo) + offset, - size, value); + radv_buffer_get_va(image->bindings[0].bo) + offset, size, value); } else { /* Only clear depth or stencil bytes in the HTILE buffer. */ - flush_bits = - clear_htile_mask(cmd_buffer, image, image->bindings[0].bo, offset, size, value, htile_mask); + flush_bits = clear_htile_mask(cmd_buffer, image, image->bindings[0].bo, offset, size, value, htile_mask); } } @@ -1525,8 +1427,8 @@ gfx8_get_fast_clear_parameters(struct radv_device *device, const struct radv_ima } const struct util_format_description *desc = vk_format_description(iview->vk.format); - if (iview->vk.format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || - iview->vk.format == VK_FORMAT_R5G6B5_UNORM_PACK16 || iview->vk.format == VK_FORMAT_B5G6R5_UNORM_PACK16) + if (iview->vk.format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || iview->vk.format == VK_FORMAT_R5G6B5_UNORM_PACK16 || + iview->vk.format == VK_FORMAT_B5G6R5_UNORM_PACK16) extra_channel = -1; else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { if (vi_alpha_is_on_msb(device, iview->vk.format)) @@ -1548,8 +1450,7 @@ gfx8_get_fast_clear_parameters(struct radv_device *device, const struct radv_ima values[i] = clear_value->int32[i] != 0; if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max) return; - } else if (desc->channel[i].pure_integer && - desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { + } else if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { /* Use the maximum value for clamping the clear color. */ unsigned max = u_bit_consecutive(0, desc->channel[i].size); @@ -1679,22 +1580,18 @@ gfx11_get_fast_clear_parameters(struct radv_device *device, const struct radv_im return true; } } else if (desc->nr_channels == 4 && desc->channel[0].size == 8) { - if (value.ub[0] == 0x00 && value.ub[1] == 0x00 && - value.ub[2] == 0x00 && value.ub[3] == 0xff) { + if (value.ub[0] == 0x00 && value.ub[1] == 0x00 && value.ub[2] == 0x00 && value.ub[3] == 0xff) { *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM; return true; - } else if (value.ub[0] == 0xff && value.ub[1] == 0xff && - value.ub[2] == 0xff && value.ub[3] == 0x00) { + } else if (value.ub[0] == 0xff && value.ub[1] == 0xff && value.ub[2] == 0xff && value.ub[3] == 0x00) { *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM; return true; } } else if (desc->nr_channels == 4 && desc->channel[0].size == 16) { - if (value.us[0] == 0x0000 && value.us[1] == 0x0000 && - value.us[2] == 0x0000 && value.us[3] == 0xffff) { + if (value.us[0] == 0x0000 && value.us[1] == 0x0000 && value.us[2] == 0x0000 && value.us[3] == 0xffff) { *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM; return true; - } else if (value.us[0] == 0xffff && value.us[1] == 0xffff && - value.us[2] == 0xffff && value.us[3] == 0x0000) { + } else if (value.us[0] == 0xffff && value.us[1] == 0xffff && value.us[2] == 0xffff && value.us[3] == 0x0000) { *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM; return true; } @@ -1711,8 +1608,7 @@ gfx11_get_fast_clear_parameters(struct radv_device *device, const struct radv_im static bool radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, - VkImageLayout image_layout, - const VkClearRect *clear_rect, VkClearColorValue clear_value, + VkImageLayout image_layout, const VkClearRect *clear_rect, VkClearColorValue clear_value, uint32_t view_mask) { uint32_t clear_color[2]; @@ -1720,10 +1616,8 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_ if (!iview || !iview->support_fast_clear) return false; - if (!radv_layout_can_fast_clear( - cmd_buffer->device, iview->image, iview->vk.base_mip_level, image_layout, - radv_image_queue_family_mask(iview->image, cmd_buffer->qf, - cmd_buffer->qf))) + if (!radv_layout_can_fast_clear(cmd_buffer->device, iview->image, iview->vk.base_mip_level, image_layout, + radv_image_queue_family_mask(iview->image, cmd_buffer->qf, cmd_buffer->qf))) return false; if (clear_rect->rect.offset.x || clear_rect->rect.offset.y || @@ -1731,8 +1625,7 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_ clear_rect->rect.extent.height != iview->image->vk.extent.height) return false; - if (view_mask && (iview->image->vk.array_layers >= 32 || - (1u << iview->image->vk.array_layers) - 1u != view_mask)) + if (view_mask && (iview->image->vk.array_layers >= 32 || (1u << iview->image->vk.array_layers) - 1u != view_mask)) return false; if (!view_mask && clear_rect->baseArrayLayer != 0) return false; @@ -1744,8 +1637,8 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_ return false; /* Images that support comp-to-single clears don't have clear values. */ - if (!iview->image->support_comp_to_single && - !radv_image_has_clear_value(iview->image) && (clear_color[0] != 0 || clear_color[1] != 0)) + if (!iview->image->support_comp_to_single && !radv_image_has_clear_value(iview->image) && + (clear_color[0] != 0 || clear_color[1] != 0)) return false; if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level)) { @@ -1753,8 +1646,7 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_ uint32_t reset_value; if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { - if (!gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, - &reset_value)) + if (!gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value)) return false; } else { gfx8_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value, @@ -1823,8 +1715,7 @@ radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_imag bool can_avoid_fast_clear_elim = true; if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { - ASSERTED bool result = - gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value); + ASSERTED bool result = gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value); assert(result); } else { gfx8_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value, @@ -1867,25 +1758,23 @@ radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_imag * The parameters mean that same as those in vkCmdClearAttachments. */ static void -emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, - const VkClearRect *clear_rect, enum radv_cmd_flush_bits *pre_flush, - enum radv_cmd_flush_bits *post_flush, uint32_t view_mask) +emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, const VkClearRect *clear_rect, + enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush, uint32_t view_mask) { const struct radv_rendering_state *render = &cmd_buffer->state.render; VkImageAspectFlags aspects = clear_att->aspectMask; if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { assert(clear_att->colorAttachment < render->color_att_count); - const struct radv_attachment *color_att = - &render->color_att[clear_att->colorAttachment]; + const struct radv_attachment *color_att = &render->color_att[clear_att->colorAttachment]; if (color_att->format == VK_FORMAT_UNDEFINED) return; VkClearColorValue clear_value = clear_att->clearValue.color; - if (radv_can_fast_clear_color(cmd_buffer, color_att->iview, color_att->layout, clear_rect, - clear_value, view_mask)) { + if (radv_can_fast_clear_color(cmd_buffer, color_att->iview, color_att->layout, clear_rect, clear_value, + view_mask)) { radv_fast_clear_color(cmd_buffer, color_att->iview, clear_att, pre_flush, post_flush); } else { emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask); @@ -1900,8 +1789,8 @@ emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_at assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); - if (radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, ds_att->layout, aspects, - clear_rect, clear_value, view_mask)) { + if (radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, ds_att->layout, aspects, clear_rect, clear_value, + view_mask)) { radv_fast_clear_depth(cmd_buffer, ds_att->iview, clear_att, pre_flush, post_flush); } else { emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect, view_mask); @@ -1918,13 +1807,11 @@ radv_rendering_needs_clear(const VkRenderingInfo *pRenderingInfo) return true; } - if (pRenderingInfo->pDepthAttachment != NULL && - pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE && + if (pRenderingInfo->pDepthAttachment != NULL && pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE && pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) return true; - if (pRenderingInfo->pStencilAttachment != NULL && - pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE && + if (pRenderingInfo->pStencilAttachment != NULL && pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE && pRenderingInfo->pStencilAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) return true; @@ -1932,10 +1819,8 @@ radv_rendering_needs_clear(const VkRenderingInfo *pRenderingInfo) } static void -radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer, - const VkClearAttachment *clear_att, - enum radv_cmd_flush_bits *pre_flush, - enum radv_cmd_flush_bits *post_flush) +radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, + enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush) { const struct radv_rendering_state *render = &cmd_buffer->state.render; @@ -1958,8 +1843,7 @@ radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer, * @see radv_attachment_state::pending_clear_aspects */ void -radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer *cmd_buffer, - const VkRenderingInfo *pRenderingInfo) +radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer *cmd_buffer, const VkRenderingInfo *pRenderingInfo) { const struct radv_rendering_state *render = &cmd_buffer->state.render; struct radv_meta_saved_state saved_state; @@ -1970,9 +1854,8 @@ radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer *cmd_buffer, return; /* Subpass clear should not be affected by conditional rendering. */ - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING); assert(render->color_att_count == pRenderingInfo->colorAttachmentCount); for (uint32_t i = 0; i < render->color_att_count; i++) { @@ -1990,14 +1873,12 @@ radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer *cmd_buffer, } if (render->ds_att.iview != NULL) { - VkClearAttachment clear_att = { .aspectMask = 0 }; + VkClearAttachment clear_att = {.aspectMask = 0}; - if (pRenderingInfo->pDepthAttachment != NULL && - pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE && + if (pRenderingInfo->pDepthAttachment != NULL && pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE && pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) { clear_att.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; - clear_att.clearValue.depthStencil.depth = - pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth; + clear_att.clearValue.depthStencil.depth = pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth; } if (pRenderingInfo->pStencilAttachment != NULL && @@ -2018,9 +1899,8 @@ radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer *cmd_buffer, } static void -radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - VkImageLayout image_layout, const VkImageSubresourceRange *range, - VkFormat format, int level, unsigned layer_count, +radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout image_layout, + const VkImageSubresourceRange *range, VkFormat format, int level, unsigned layer_count, const VkClearValue *clear_val) { struct radv_image_view iview; @@ -2067,10 +1947,11 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *im VkRenderingInfo rendering_info = { .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, - .renderArea = { - .offset = { 0, 0 }, - .extent = { width, height }, - }, + .renderArea = + { + .offset = {0, 0}, + .extent = {width, height}, + }, .layerCount = layer_count, }; @@ -2097,8 +1978,7 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *im */ static bool radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkFormat format, - VkImageLayout image_layout, - const VkImageSubresourceRange *range, const VkClearValue *clear_val) + VkImageLayout image_layout, const VkImageSubresourceRange *range, const VkClearValue *clear_val) { struct radv_image_view iview; bool fast_cleared = false; @@ -2141,14 +2021,12 @@ radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer, struct radv_image *ima }; if (vk_format_is_color(format)) { - if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout, &clear_rect, - clear_att.clearValue.color, 0)) { + if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout, &clear_rect, clear_att.clearValue.color, 0)) { radv_fast_clear_color(cmd_buffer, &iview, &clear_att, NULL, NULL); fast_cleared = true; } } else { - if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout, - range->aspectMask, &clear_rect, + if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout, range->aspectMask, &clear_rect, clear_att.clearValue.depthStencil, 0)) { radv_fast_clear_depth(cmd_buffer, &iview, &clear_att, NULL, NULL); fast_cleared = true; @@ -2160,9 +2038,9 @@ radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer, struct radv_image *ima } static void -radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - VkImageLayout image_layout, const VkClearValue *clear_value, - uint32_t range_count, const VkImageSubresourceRange *ranges, bool cs) +radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout image_layout, + const VkClearValue *clear_value, uint32_t range_count, const VkImageSubresourceRange *ranges, + bool cs) { VkFormat format = image->vk.format; VkClearValue internal_clear_value; @@ -2177,20 +2055,17 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *imag if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) { bool blendable; if (cs ? !radv_is_storage_image_format_supported(cmd_buffer->device->physical_device, format) - : !radv_is_colorbuffer_format_supported(cmd_buffer->device->physical_device, format, - &blendable)) { + : !radv_is_colorbuffer_format_supported(cmd_buffer->device->physical_device, format, &blendable)) { format = VK_FORMAT_R32_UINT; internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32); - uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, - cmd_buffer->qf); + uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf); for (uint32_t r = 0; r < range_count; r++) { const VkImageSubresourceRange *range = &ranges[r]; /* Don't use compressed image stores because they will use an incompatible format. */ - if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, - image_layout, queue_mask)) { + if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, image_layout, queue_mask)) { disable_compression = cs; break; } @@ -2212,8 +2087,7 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *imag /* Try to perform a fast clear first, otherwise fallback to * the legacy path. */ - if (!cs && radv_fast_clear_range(cmd_buffer, image, format, image_layout, range, - &internal_clear_value)) { + if (!cs && radv_fast_clear_range(cmd_buffer, image, format, image_layout, range, &internal_clear_value)) { continue; } @@ -2252,16 +2126,14 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *imag VKAPI_ATTR void VKAPI_CALL radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout, - const VkClearColorValue *pColor, uint32_t rangeCount, - const VkImageSubresourceRange *pRanges) + const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_image, image, image_h); struct radv_meta_saved_state saved_state; bool cs; - cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || - !radv_image_is_renderable(cmd_buffer->device, image); + cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(cmd_buffer->device, image); /* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering. */ @@ -2273,15 +2145,13 @@ radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageL radv_meta_save(&saved_state, cmd_buffer, save_flags); - radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pColor, rangeCount, - pRanges, cs); + radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pColor, rangeCount, pRanges, cs); radv_meta_restore(&saved_state, cmd_buffer); } VKAPI_ATTR void VKAPI_CALL -radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, - VkImageLayout imageLayout, +radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout, const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange *pRanges) { @@ -2290,20 +2160,18 @@ radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, struct radv_meta_saved_state saved_state; /* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering. */ - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING); - radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pDepthStencil, - rangeCount, pRanges, false); + radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pDepthStencil, rangeCount, pRanges, + false); radv_meta_restore(&saved_state, cmd_buffer); } VKAPI_ATTR void VKAPI_CALL -radv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, - const VkClearAttachment *pAttachments, uint32_t rectCount, - const VkClearRect *pRects) +radv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments, + uint32_t rectCount, const VkClearRect *pRects) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_meta_saved_state saved_state; @@ -2313,8 +2181,7 @@ radv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount if (!cmd_buffer->state.render.active) return; - radv_meta_save(&saved_state, cmd_buffer, - RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS); + radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS); /* FINISHME: We can do better than this dumb loop. It thrashes too much * state. diff --git a/src/amd/vulkan/meta/radv_meta_copy.c b/src/amd/vulkan/meta/radv_meta_copy.c index caad9b4..f4c3a29 100644 --- a/src/amd/vulkan/meta/radv_meta_copy.c +++ b/src/amd/vulkan/meta/radv_meta_copy.c @@ -47,8 +47,7 @@ vk_format_for_size(int bs) } static struct radv_meta_blit2d_surf -blit_surf_for_image_level_layer(struct radv_image *image, VkImageLayout layout, - const VkImageSubresourceLayers *subres, +blit_surf_for_image_level_layer(struct radv_image *image, VkImageLayout layout, const VkImageSubresourceLayers *subres, VkImageAspectFlags aspect_mask) { VkFormat format = radv_get_aspect_format(image, aspect_mask); @@ -72,15 +71,12 @@ blit_surf_for_image_level_layer(struct radv_image *image, VkImageLayout layout, bool radv_image_is_renderable(const struct radv_device *device, const struct radv_image *image) { - if (image->vk.format == VK_FORMAT_R32G32B32_UINT || - image->vk.format == VK_FORMAT_R32G32B32_SINT || + if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT || image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) return false; - if (device->physical_device->rad_info.gfx_level >= GFX9 && - image->vk.image_type == VK_IMAGE_TYPE_3D && - vk_format_get_blocksizebits(image->vk.format) == 128 && - vk_format_is_compressed(image->vk.format)) + if (device->physical_device->rad_info.gfx_level >= GFX9 && image->vk.image_type == VK_IMAGE_TYPE_3D && + vk_format_get_blocksizebits(image->vk.format) == 128 && vk_format_is_compressed(image->vk.format)) return false; if (image->planes[0].surface.flags & RADEON_SURF_NO_RENDER_TARGET) @@ -90,9 +86,8 @@ radv_image_is_renderable(const struct radv_device *device, const struct radv_ima } static void -copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, - struct radv_image *image, VkImageLayout layout, - const VkBufferImageCopy2 *region) +copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, struct radv_image *image, + VkImageLayout layout, const VkBufferImageCopy2 *region) { struct radv_meta_saved_state saved_state; bool cs; @@ -102,16 +97,14 @@ copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buf */ assert(image->vk.samples == 1); - cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || - !radv_image_is_renderable(cmd_buffer->device, image); + cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(cmd_buffer->device, image); /* VK_EXT_conditional_rendering says that copy commands should not be * affected by conditional rendering. */ radv_meta_save(&saved_state, cmd_buffer, - (cs ? RADV_META_SAVE_COMPUTE_PIPELINE : RADV_META_SAVE_GRAPHICS_PIPELINE) | - RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS | - RADV_META_SUSPEND_PREDICATING); + (cs ? RADV_META_SAVE_COMPUTE_PIPELINE : RADV_META_SAVE_GRAPHICS_PIPELINE) | RADV_META_SAVE_CONSTANTS | + RADV_META_SAVE_DESCRIPTORS | RADV_META_SUSPEND_PREDICATING); /** * From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images @@ -133,15 +126,13 @@ copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buf }; /* Create blit surfaces */ - struct radv_meta_blit2d_surf img_bsurf = blit_surf_for_image_level_layer( - image, layout, ®ion->imageSubresource, region->imageSubresource.aspectMask); + struct radv_meta_blit2d_surf img_bsurf = + blit_surf_for_image_level_layer(image, layout, ®ion->imageSubresource, region->imageSubresource.aspectMask); if (!radv_is_buffer_format_supported(img_bsurf.format, NULL)) { - uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, - cmd_buffer->qf); + uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf); bool compressed = - radv_layout_dcc_compressed(cmd_buffer->device, image, region->imageSubresource.mipLevel, - layout, queue_mask); + radv_layout_dcc_compressed(cmd_buffer->device, image, region->imageSubresource.mipLevel, layout, queue_mask); if (compressed) { radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_UNKNOWN_REASON); @@ -205,16 +196,14 @@ copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buf } VKAPI_ATTR void VKAPI_CALL -radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, - const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) +radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer); RADV_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage); for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) { - copy_buffer_to_image(cmd_buffer, src_buffer, dst_image, - pCopyBufferToImageInfo->dstImageLayout, + copy_buffer_to_image(cmd_buffer, src_buffer, dst_image, pCopyBufferToImageInfo->dstImageLayout, &pCopyBufferToImageInfo->pRegions[r]); } @@ -223,8 +212,7 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) | - radv_dst_access_flush( - cmd_buffer, VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, dst_image); + radv_dst_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, dst_image); for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) { radv_meta_decode_etc(cmd_buffer, dst_image, pCopyBufferToImageInfo->dstImageLayout, &pCopyBufferToImageInfo->pRegions[r].imageSubresource, @@ -235,9 +223,8 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, } static void -copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, - struct radv_image *image, VkImageLayout layout, - const VkBufferImageCopy2 *region) +copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, struct radv_image *image, + VkImageLayout layout, const VkBufferImageCopy2 *region) { struct radv_device *device = cmd_buffer->device; if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) { @@ -260,8 +247,8 @@ copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buf * affected by conditional rendering. */ radv_meta_save(&saved_state, cmd_buffer, - RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | - RADV_META_SAVE_DESCRIPTORS | RADV_META_SUSPEND_PREDICATING); + RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS | + RADV_META_SUSPEND_PREDICATING); /** * From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images @@ -288,14 +275,13 @@ copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buf }; /* Create blit surfaces */ - struct radv_meta_blit2d_surf img_info = blit_surf_for_image_level_layer( - image, layout, ®ion->imageSubresource, region->imageSubresource.aspectMask); + struct radv_meta_blit2d_surf img_info = + blit_surf_for_image_level_layer(image, layout, ®ion->imageSubresource, region->imageSubresource.aspectMask); if (!radv_is_buffer_format_supported(img_info.format, NULL)) { - uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, - cmd_buffer->qf); - bool compressed = radv_layout_dcc_compressed(device, image, region->imageSubresource.mipLevel, - layout, queue_mask); + uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf); + bool compressed = + radv_layout_dcc_compressed(device, image, region->imageSubresource.mipLevel, layout, queue_mask); if (compressed) { radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_UNKNOWN_REASON); @@ -349,24 +335,21 @@ copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buf } VKAPI_ATTR void VKAPI_CALL -radv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, - const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo) +radv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_image, src_image, pCopyImageToBufferInfo->srcImage); RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer); for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) { - copy_image_to_buffer(cmd_buffer, dst_buffer, src_image, - pCopyImageToBufferInfo->srcImageLayout, + copy_image_to_buffer(cmd_buffer, dst_buffer, src_image, pCopyImageToBufferInfo->srcImageLayout, &pCopyImageToBufferInfo->pRegions[r]); } } static void -copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, - VkImageLayout src_image_layout, struct radv_image *dst_image, - VkImageLayout dst_image_layout, const VkImageCopy2 *region) +copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout, + struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageCopy2 *region) { struct radv_meta_saved_state saved_state; bool cs; @@ -378,29 +361,24 @@ copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, */ assert(src_image->vk.samples == dst_image->vk.samples); - cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || - !radv_image_is_renderable(cmd_buffer->device, dst_image); + cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(cmd_buffer->device, dst_image); /* VK_EXT_conditional_rendering says that copy commands should not be * affected by conditional rendering. */ radv_meta_save(&saved_state, cmd_buffer, - (cs ? RADV_META_SAVE_COMPUTE_PIPELINE : RADV_META_SAVE_GRAPHICS_PIPELINE) | - RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS | - RADV_META_SUSPEND_PREDICATING); + (cs ? RADV_META_SAVE_COMPUTE_PIPELINE : RADV_META_SAVE_GRAPHICS_PIPELINE) | RADV_META_SAVE_CONSTANTS | + RADV_META_SAVE_DESCRIPTORS | RADV_META_SUSPEND_PREDICATING); if (cs) { /* For partial copies, HTILE should be decompressed before copying because the metadata is * re-initialized to the uncompressed state after. */ - uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, - cmd_buffer->qf); + uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf); - if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, dst_image_layout, - queue_mask) && + if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, dst_image_layout, queue_mask) && (region->dstOffset.x || region->dstOffset.y || region->dstOffset.z || - region->extent.width != dst_image->vk.extent.width || - region->extent.height != dst_image->vk.extent.height || + region->extent.width != dst_image->vk.extent.width || region->extent.height != dst_image->vk.extent.height || region->extent.depth != dst_image->vk.extent.depth)) { radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_UNKNOWN_REASON); @@ -421,17 +399,13 @@ copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, } } - VkImageAspectFlags src_aspects[3] = { region->srcSubresource.aspectMask }; - VkImageAspectFlags dst_aspects[3] = { region->dstSubresource.aspectMask }; + VkImageAspectFlags src_aspects[3] = {region->srcSubresource.aspectMask}; + VkImageAspectFlags dst_aspects[3] = {region->dstSubresource.aspectMask}; unsigned aspect_count = 1; - if (region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT && - src_image->plane_count > 1) { - static const VkImageAspectFlags all_planes[3] = { - VK_IMAGE_ASPECT_PLANE_0_BIT, - VK_IMAGE_ASPECT_PLANE_1_BIT, - VK_IMAGE_ASPECT_PLANE_2_BIT - }; + if (region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT && src_image->plane_count > 1) { + static const VkImageAspectFlags all_planes[3] = {VK_IMAGE_ASPECT_PLANE_0_BIT, VK_IMAGE_ASPECT_PLANE_1_BIT, + VK_IMAGE_ASPECT_PLANE_2_BIT}; aspect_count = src_image->plane_count; for (unsigned i = 0; i < aspect_count; i++) { @@ -442,28 +416,23 @@ copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, for (unsigned a = 0; a < aspect_count; ++a) { /* Create blit surfaces */ - struct radv_meta_blit2d_surf b_src = blit_surf_for_image_level_layer( - src_image, src_image_layout, ®ion->srcSubresource, src_aspects[a]); + struct radv_meta_blit2d_surf b_src = + blit_surf_for_image_level_layer(src_image, src_image_layout, ®ion->srcSubresource, src_aspects[a]); - struct radv_meta_blit2d_surf b_dst = blit_surf_for_image_level_layer( - dst_image, dst_image_layout, ®ion->dstSubresource, dst_aspects[a]); + struct radv_meta_blit2d_surf b_dst = + blit_surf_for_image_level_layer(dst_image, dst_image_layout, ®ion->dstSubresource, dst_aspects[a]); - uint32_t dst_queue_mask = radv_image_queue_family_mask( - dst_image, cmd_buffer->qf, cmd_buffer->qf); - bool dst_compressed = radv_layout_dcc_compressed(cmd_buffer->device, dst_image, - region->dstSubresource.mipLevel, + uint32_t dst_queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf); + bool dst_compressed = radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, dst_image_layout, dst_queue_mask); - uint32_t src_queue_mask = radv_image_queue_family_mask( - src_image, cmd_buffer->qf, cmd_buffer->qf); - bool src_compressed = radv_layout_dcc_compressed(cmd_buffer->device, src_image, - region->srcSubresource.mipLevel, + uint32_t src_queue_mask = radv_image_queue_family_mask(src_image, cmd_buffer->qf, cmd_buffer->qf); + bool src_compressed = radv_layout_dcc_compressed(cmd_buffer->device, src_image, region->srcSubresource.mipLevel, src_image_layout, src_queue_mask); bool need_dcc_sign_reinterpret = false; - if (!src_compressed || - (radv_dcc_formats_compatible(cmd_buffer->device->physical_device->rad_info.gfx_level, - b_src.format, b_dst.format, &need_dcc_sign_reinterpret) && - !need_dcc_sign_reinterpret)) { + if (!src_compressed || (radv_dcc_formats_compatible(cmd_buffer->device->physical_device->rad_info.gfx_level, + b_src.format, b_dst.format, &need_dcc_sign_reinterpret) && + !need_dcc_sign_reinterpret)) { b_src.format = b_dst.format; } else if (!dst_compressed) { b_dst.format = b_src.format; @@ -493,10 +462,8 @@ copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, * Also, convert the offsets and extent from units of texels to units of * blocks - which is the highest resolution accessible in this command. */ - const VkOffset3D dst_offset_el = - vk_image_offset_to_elements(&dst_image->vk, region->dstOffset); - const VkOffset3D src_offset_el = - vk_image_offset_to_elements(&src_image->vk, region->srcOffset); + const VkOffset3D dst_offset_el = vk_image_offset_to_elements(&dst_image->vk, region->dstOffset); + const VkOffset3D src_offset_el = vk_image_offset_to_elements(&src_image->vk, region->srcOffset); /* * From Vulkan 1.0.68, "Copying Data Between Images": @@ -550,11 +517,9 @@ copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, if (cs) { /* Fixup HTILE after a copy on compute. */ - uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, - cmd_buffer->qf); + uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf); - if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, dst_image_layout, - queue_mask)) { + if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, dst_image_layout, queue_mask)) { cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE; VkImageSubresourceRange range = { @@ -582,8 +547,8 @@ radv_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyI RADV_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage); for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) { - copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image, - pCopyImageInfo->dstImageLayout, &pCopyImageInfo->pRegions[r]); + copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image, pCopyImageInfo->dstImageLayout, + &pCopyImageInfo->pRegions[r]); } if (cmd_buffer->device->physical_device->emulate_etc2 && @@ -591,12 +556,10 @@ radv_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyI cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) | - radv_dst_access_flush( - cmd_buffer, VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, dst_image); + radv_dst_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, dst_image); for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) { radv_meta_decode_etc(cmd_buffer, dst_image, pCopyImageInfo->dstImageLayout, - &pCopyImageInfo->pRegions[r].dstSubresource, - pCopyImageInfo->pRegions[r].dstOffset, + &pCopyImageInfo->pRegions[r].dstSubresource, pCopyImageInfo->pRegions[r].dstOffset, pCopyImageInfo->pRegions[r].extent); } } diff --git a/src/amd/vulkan/meta/radv_meta_copy_vrs_htile.c b/src/amd/vulkan/meta/radv_meta_copy_vrs_htile.c index f8fa126..6af3920 100644 --- a/src/amd/vulkan/meta/radv_meta_copy_vrs_htile.c +++ b/src/amd/vulkan/meta/radv_meta_copy_vrs_htile.c @@ -33,12 +33,10 @@ radv_device_finish_meta_copy_vrs_htile_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; - radv_DestroyPipeline(radv_device_to_handle(device), state->copy_vrs_htile_pipeline, - &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->copy_vrs_htile_p_layout, - &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout( - radv_device_to_handle(device), state->copy_vrs_htile_ds_layout, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->copy_vrs_htile_pipeline, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->copy_vrs_htile_p_layout, &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->copy_vrs_htile_ds_layout, + &state->alloc); } static nir_shader * @@ -64,15 +62,13 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf /* Get the HTILE addr from coordinates. */ nir_ssa_def *zero = nir_imm_int(&b, 0); - nir_ssa_def *htile_addr = ac_nir_htile_addr_from_coord( - &b, &device->physical_device->rad_info, &surf->u.gfx9.zs.htile_equation, htile_pitch, - htile_slice_size, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero); + nir_ssa_def *htile_addr = + ac_nir_htile_addr_from_coord(&b, &device->physical_device->rad_info, &surf->u.gfx9.zs.htile_equation, htile_pitch, + htile_slice_size, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero); /* Set up the input VRS image descriptor. */ - const struct glsl_type *vrs_sampler_type = - glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT); - nir_variable *input_vrs_img = - nir_variable_create(b.shader, nir_var_uniform, vrs_sampler_type, "input_vrs_image"); + const struct glsl_type *vrs_sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT); + nir_variable *input_vrs_img = nir_variable_create(b.shader, nir_var_uniform, vrs_sampler_type, "input_vrs_image"); input_vrs_img->data.descriptor_set = 0; input_vrs_img->data.binding = 0; @@ -124,32 +120,30 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf } static VkResult -radv_device_init_meta_copy_vrs_htile_state(struct radv_device *device, - struct radeon_surf *surf) +radv_device_init_meta_copy_vrs_htile_state(struct radv_device *device, struct radeon_surf *surf) { struct radv_meta_state *state = &device->meta_state; nir_shader *cs = build_copy_vrs_htile_shader(device, surf); VkResult result; - VkDescriptorSetLayoutCreateInfo ds_layout_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 2, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - {.binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, - &state->alloc, &state->copy_vrs_htile_ds_layout); + VkDescriptorSetLayoutCreateInfo ds_layout_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + {.binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, &state->alloc, + &state->copy_vrs_htile_ds_layout); if (result != VK_SUCCESS) goto fail; @@ -186,18 +180,16 @@ radv_device_init_meta_copy_vrs_htile_state(struct radv_device *device, .layout = state->copy_vrs_htile_p_layout, }; - result = - radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &pipeline_info, - NULL, &state->copy_vrs_htile_pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &pipeline_info, NULL, + &state->copy_vrs_htile_pipeline); fail: ralloc_free(cs); return result; } void -radv_copy_vrs_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *vrs_image, - const VkRect2D *rect, struct radv_image *dst_image, - struct radv_buffer *htile_buffer, bool read_htile_value) +radv_copy_vrs_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *vrs_image, const VkRect2D *rect, + struct radv_image *dst_image, struct radv_buffer *htile_buffer, bool read_htile_value) { struct radv_device *device = cmd_buffer->device; struct radv_meta_state *state = &device->meta_state; @@ -207,8 +199,7 @@ radv_copy_vrs_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *vrs_i assert(radv_image_has_htile(dst_image)); if (!cmd_buffer->device->meta_state.copy_vrs_htile_pipeline) { - VkResult ret = radv_device_init_meta_copy_vrs_htile_state(cmd_buffer->device, - &dst_image->planes[0].surface); + VkResult ret = radv_device_init_meta_copy_vrs_htile_state(cmd_buffer->device, &dst_image->planes[0].surface); if (ret != VK_SUCCESS) { vk_command_buffer_set_error(&cmd_buffer->vk, ret); return; @@ -219,9 +210,8 @@ radv_copy_vrs_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *vrs_i radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) | radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT, NULL); - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, state->copy_vrs_htile_pipeline); @@ -242,33 +232,34 @@ radv_copy_vrs_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *vrs_i radv_meta_push_descriptor_set( cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->copy_vrs_htile_p_layout, 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){ - {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = - (VkDescriptorImageInfo[]){ - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(&vrs_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - }}, - {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(htile_buffer), - .offset = 0, - .range = htile_buffer->vk.size}}}); + 2, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = + (VkDescriptorImageInfo[]){ + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(&vrs_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }}, + {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(htile_buffer), + .offset = 0, + .range = htile_buffer->vk.size}}}); const unsigned constants[5] = { - rect->offset.x, rect->offset.y, - dst_image->planes[0].surface.meta_pitch, dst_image->planes[0].surface.meta_slice_size, + rect->offset.x, + rect->offset.y, + dst_image->planes[0].surface.meta_pitch, + dst_image->planes[0].surface.meta_slice_size, read_htile_value, }; @@ -284,7 +275,6 @@ radv_copy_vrs_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *vrs_i radv_meta_restore(&saved_state, cmd_buffer); - cmd_buffer->state.flush_bits |= - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | - radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL); + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | + radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL); } diff --git a/src/amd/vulkan/meta/radv_meta_dcc_retile.c b/src/amd/vulkan/meta/radv_meta_dcc_retile.c index a6e05a3..72987c9 100644 --- a/src/amd/vulkan/meta/radv_meta_dcc_retile.c +++ b/src/amd/vulkan/meta/radv_meta_dcc_retile.c @@ -56,26 +56,21 @@ build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *sur nir_ssa_def *coord = get_global_ids(&b, 2); nir_ssa_def *zero = nir_imm_int(&b, 0); - coord = nir_imul( - &b, coord, - nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height)); - - nir_ssa_def *src = ac_nir_dcc_addr_from_coord(&b, &dev->physical_device->rad_info, surf->bpe, - &surf->u.gfx9.color.dcc_equation, src_dcc_pitch, - src_dcc_height, zero, nir_channel(&b, coord, 0), - nir_channel(&b, coord, 1), zero, zero, zero); + coord = + nir_imul(&b, coord, nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height)); + + nir_ssa_def *src = ac_nir_dcc_addr_from_coord( + &b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.dcc_equation, src_dcc_pitch, src_dcc_height, + zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero); nir_ssa_def *dst = ac_nir_dcc_addr_from_coord( - &b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation, - dst_dcc_pitch, dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), - zero, zero, zero); + &b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation, dst_dcc_pitch, + dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero); - nir_ssa_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, - nir_vec4(&b, src, src, src, src), - nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), - .image_dim = dim); + nir_ssa_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, nir_vec4(&b, src, src, src, src), + nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = dim); - nir_image_deref_store(&b, output_dcc_ref, nir_vec4(&b, dst, dst, dst, dst), - nir_ssa_undef(&b, 1, 32), dcc_val, nir_imm_int(&b, 0), .image_dim = dim); + nir_image_deref_store(&b, output_dcc_ref, nir_vec4(&b, dst, dst, dst, dst), nir_ssa_undef(&b, 1, 32), dcc_val, + nir_imm_int(&b, 0), .image_dim = dim); return b.shader; } @@ -86,13 +81,11 @@ radv_device_finish_meta_dcc_retile_state(struct radv_device *device) struct radv_meta_state *state = &device->meta_state; for (unsigned i = 0; i < ARRAY_SIZE(state->dcc_retile.pipeline); i++) { - radv_DestroyPipeline(radv_device_to_handle(device), state->dcc_retile.pipeline[i], - &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->dcc_retile.pipeline[i], &state->alloc); } - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->dcc_retile.p_layout, - &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), - state->dcc_retile.ds_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->dcc_retile.p_layout, &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->dcc_retile.ds_layout, + &state->alloc); /* Reset for next finish. */ memset(&state->dcc_retile, 0, sizeof(state->dcc_retile)); @@ -112,25 +105,23 @@ radv_device_init_meta_dcc_retile_state(struct radv_device *device, struct radeon VkResult result = VK_SUCCESS; nir_shader *cs = build_dcc_retile_compute_shader(device, surf); - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 2, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - {.binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, - &device->meta_state.alloc, + VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + {.binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, &device->meta_state.dcc_retile.ds_layout); if (result != VK_SUCCESS) goto cleanup; @@ -143,9 +134,8 @@ radv_device_init_meta_dcc_retile_state(struct radv_device *device, struct radeon .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, }; - result = - radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, &device->meta_state.dcc_retile.p_layout); + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, + &device->meta_state.dcc_retile.p_layout); if (result != VK_SUCCESS) goto cleanup; @@ -166,9 +156,8 @@ radv_device_init_meta_dcc_retile_state(struct radv_device *device, struct radeon .layout = device->meta_state.dcc_retile.p_layout, }; - result = radv_compute_pipeline_create( - radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, NULL, - &device->meta_state.dcc_retile.pipeline[surf->u.gfx9.swizzle_mode]); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + NULL, &device->meta_state.dcc_retile.pipeline[surf->u.gfx9.swizzle_mode]); if (result != VK_SUCCESS) goto cleanup; @@ -196,17 +185,15 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image) /* Compile pipelines if not already done so. */ if (!cmd_buffer->device->meta_state.dcc_retile.pipeline[swizzle_mode]) { - VkResult ret = - radv_device_init_meta_dcc_retile_state(cmd_buffer->device, &image->planes[0].surface); + VkResult ret = radv_device_init_meta_dcc_retile_state(cmd_buffer->device, &image->planes[0].surface); if (ret != VK_SUCCESS) { vk_command_buffer_set_error(&cmd_buffer->vk, ret); return; } } - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS); radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.dcc_retile.pipeline[swizzle_mode]); @@ -234,8 +221,8 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image) for (unsigned i = 0; i < 2; ++i) view_handles[i] = radv_buffer_view_to_handle(&views[i]); - radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - device->meta_state.dcc_retile.p_layout, 0, /* set */ + radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.dcc_retile.p_layout, + 0, /* set */ 2, /* descriptorWriteCount */ (VkWriteDescriptorSet[]){ { @@ -260,8 +247,7 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image) unsigned height = DIV_ROUND_UP(image->vk.extent.height, vk_format_get_blockheight(image->vk.format)); unsigned dcc_width = DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width); - unsigned dcc_height = - DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height); + unsigned dcc_height = DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height); uint32_t constants[] = { image->planes[0].surface.u.gfx9.color.dcc_pitch_max + 1, @@ -269,9 +255,8 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image) image->planes[0].surface.u.gfx9.color.display_dcc_pitch_max + 1, image->planes[0].surface.u.gfx9.color.display_dcc_height, }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.dcc_retile.p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, - constants); + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.dcc_retile.p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, constants); radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, 1); @@ -281,6 +266,6 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image) radv_meta_restore(&saved_state, cmd_buffer); - state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); + state->flush_bits |= + RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); } diff --git a/src/amd/vulkan/meta/radv_meta_decompress.c b/src/amd/vulkan/meta/radv_meta_decompress.c index f985fea..ffb12df 100644 --- a/src/amd/vulkan/meta/radv_meta_decompress.c +++ b/src/amd/vulkan/meta/radv_meta_decompress.c @@ -53,15 +53,14 @@ build_expand_depth_stencil_compute_shader(struct radv_device *dev) nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32); - nir_ssa_def *block_size = - nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1], - b.shader->info.workgroup_size[2], 0); + nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1], + b.shader->info.workgroup_size[2], 0); nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); - nir_ssa_def *data = nir_image_deref_load( - &b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id, nir_ssa_undef(&b, 1, 32), - nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D); + nir_ssa_def *data = + nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id, + nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D); /* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid * creating a vmcnt(0) because it expects the L1 cache to keep memory @@ -70,9 +69,8 @@ build_expand_depth_stencil_compute_shader(struct radv_device *dev) nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE, .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo); - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id, - nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0), - .image_dim = GLSL_SAMPLER_DIM_2D); + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id, nir_ssa_undef(&b, 1, 32), data, + nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D); return b.shader; } @@ -82,26 +80,24 @@ create_expand_depth_stencil_compute(struct radv_device *device) VkResult result = VK_SUCCESS; nir_shader *cs = build_expand_depth_stencil_compute_shader(device); - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 2, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - {.binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout( - radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, - &device->meta_state.expand_depth_stencil_compute_ds_layout); + VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + {.binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, + &device->meta_state.expand_depth_stencil_compute_ds_layout); if (result != VK_SUCCESS) goto cleanup; @@ -113,9 +109,8 @@ create_expand_depth_stencil_compute(struct radv_device *device) .pPushConstantRanges = NULL, }; - result = radv_CreatePipelineLayout( - radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, - &device->meta_state.expand_depth_stencil_compute_p_layout); + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, + &device->meta_state.expand_depth_stencil_compute_p_layout); if (result != VK_SUCCESS) goto cleanup; @@ -136,10 +131,8 @@ create_expand_depth_stencil_compute(struct radv_device *device) .layout = device->meta_state.expand_depth_stencil_compute_p_layout, }; - result = radv_CreateComputePipelines( - radv_device_to_handle(device), device->meta_state.cache, 1, - &vk_pipeline_info, NULL, - &device->meta_state.expand_depth_stencil_compute_pipeline); + result = radv_CreateComputePipelines(radv_device_to_handle(device), device->meta_state.cache, 1, &vk_pipeline_info, + NULL, &device->meta_state.expand_depth_stencil_compute_pipeline); if (result != VK_SUCCESS) goto cleanup; @@ -159,13 +152,12 @@ create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout) .pPushConstantRanges = NULL, }; - return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, layout); + return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, layout); } static VkResult -create_pipeline(struct radv_device *device, uint32_t samples, VkPipelineLayout layout, - enum radv_depth_op op, VkPipeline *pipeline) +create_pipeline(struct radv_device *device, uint32_t samples, VkPipelineLayout layout, enum radv_depth_op op, + VkPipeline *pipeline) { VkResult result; VkDevice device_h = radv_device_to_handle(device); @@ -290,8 +282,8 @@ create_pipeline(struct radv_device *device, uint32_t samples, VkPipelineLayout l .resummarize_enable = op == DEPTH_RESUMMARIZE, }; - result = radv_graphics_pipeline_create(device_h, device->meta_state.cache, &pipeline_create_info, - &extra, &device->meta_state.alloc, pipeline); + result = radv_graphics_pipeline_create(device_h, device->meta_state.cache, &pipeline_create_info, &extra, + &device->meta_state.alloc, pipeline); cleanup: ralloc_free(fs_module); @@ -306,21 +298,17 @@ radv_device_finish_meta_depth_decomp_state(struct radv_device *device) struct radv_meta_state *state = &device->meta_state; for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) { - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->depth_decomp[i].p_layout, - &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->depth_decomp[i].p_layout, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->depth_decomp[i].decompress_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->depth_decomp[i].resummarize_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->depth_decomp[i].decompress_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->depth_decomp[i].resummarize_pipeline, &state->alloc); } - radv_DestroyPipeline(radv_device_to_handle(device), - state->expand_depth_stencil_compute_pipeline, &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), - state->expand_depth_stencil_compute_p_layout, &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout( - radv_device_to_handle(device), state->expand_depth_stencil_compute_ds_layout, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->expand_depth_stencil_compute_pipeline, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->expand_depth_stencil_compute_p_layout, + &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), + state->expand_depth_stencil_compute_ds_layout, &state->alloc); } VkResult @@ -365,15 +353,15 @@ radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image *i if (!state->depth_decomp[samples_log2].decompress_pipeline) { VkResult ret; - ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].p_layout, - DEPTH_DECOMPRESS, &state->depth_decomp[samples_log2].decompress_pipeline); + ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].p_layout, DEPTH_DECOMPRESS, + &state->depth_decomp[samples_log2].decompress_pipeline); if (ret != VK_SUCCESS) { vk_command_buffer_set_error(&cmd_buffer->vk, ret); return NULL; } - ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].p_layout, - DEPTH_RESUMMARIZE, &state->depth_decomp[samples_log2].resummarize_pipeline); + ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].p_layout, DEPTH_RESUMMARIZE, + &state->depth_decomp[samples_log2].resummarize_pipeline); if (ret != VK_SUCCESS) { vk_command_buffer_set_error(&cmd_buffer->vk, ret); return NULL; @@ -440,10 +428,7 @@ radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_i const VkRenderingInfo rendering_info = { .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, - .renderArea = { - .offset = { 0, 0 }, - .extent = { width, height } - }, + .renderArea = {.offset = {0, 0}, .extent = {width, height}}, .layerCount = 1, .pDepthAttachment = &depth_att, .pStencilAttachment = &stencil_att, @@ -467,14 +452,11 @@ radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); VkPipeline *pipeline; - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_RENDER); + radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_RENDER); pipeline = radv_get_depth_pipeline(cmd_buffer, image, subresourceRange, op); - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, - *pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); if (sample_locs) { assert(image->vk.create_flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT); @@ -483,13 +465,12 @@ radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image * automatic layout transitions, otherwise the depth decompress * pass uses the default HW locations. */ - radv_CmdSetSampleLocationsEXT(cmd_buffer_h, - &(VkSampleLocationsInfoEXT){ - .sampleLocationsPerPixel = sample_locs->per_pixel, - .sampleLocationGridSize = sample_locs->grid_size, - .sampleLocationsCount = sample_locs->count, - .pSampleLocations = sample_locs->locations, - }); + radv_CmdSetSampleLocationsEXT(cmd_buffer_h, &(VkSampleLocationsInfoEXT){ + .sampleLocationsPerPixel = sample_locs->per_pixel, + .sampleLocationGridSize = sample_locs->grid_size, + .sampleLocationsCount = sample_locs->count, + .pSampleLocations = sample_locs->locations, + }); } for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, subresourceRange); ++l) { @@ -501,13 +482,9 @@ radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image uint32_t width = radv_minify(image->vk.extent.width, subresourceRange->baseMipLevel + l); uint32_t height = radv_minify(image->vk.extent.height, subresourceRange->baseMipLevel + l); - radv_CmdSetViewport(cmd_buffer_h, 0, 1, - &(VkViewport){.x = 0, - .y = 0, - .width = width, - .height = height, - .minDepth = 0.0f, - .maxDepth = 1.0f}); + radv_CmdSetViewport( + cmd_buffer_h, 0, 1, + &(VkViewport){.x = 0, .y = 0, .width = width, .height = height, .minDepth = 0.0f, .maxDepth = 1.0f}); radv_CmdSetScissor(cmd_buffer_h, 0, 1, &(VkRect2D){ @@ -534,11 +511,9 @@ radv_expand_depth_stencil_compute(struct radv_cmd_buffer *cmd_buffer, struct rad assert(radv_image_is_tc_compat_htile(image)); - cmd_buffer->state.flush_bits |= - radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); + cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); - radv_meta_save(&saved_state, cmd_buffer, - RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE); + radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE); radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.expand_depth_stencil_compute_pipeline); @@ -554,38 +529,36 @@ radv_expand_depth_stencil_compute(struct radv_cmd_buffer *cmd_buffer, struct rad height = radv_minify(image->vk.extent.height, subresourceRange->baseMipLevel + l); for (uint32_t s = 0; s < vk_image_subresource_layer_count(&image->vk, subresourceRange); s++) { - radv_image_view_init( - &load_iview, cmd_buffer->device, - &(VkImageViewCreateInfo){ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = radv_image_to_handle(image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = image->vk.format, - .subresourceRange = {.aspectMask = subresourceRange->aspectMask, - .baseMipLevel = subresourceRange->baseMipLevel + l, - .levelCount = 1, - .baseArrayLayer = subresourceRange->baseArrayLayer + s, - .layerCount = 1}, - }, - 0, &(struct radv_image_view_extra_create_info){.enable_compression = true}); - radv_image_view_init( - &store_iview, cmd_buffer->device, - &(VkImageViewCreateInfo){ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = radv_image_to_handle(image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = image->vk.format, - .subresourceRange = {.aspectMask = subresourceRange->aspectMask, - .baseMipLevel = subresourceRange->baseMipLevel + l, - .levelCount = 1, - .baseArrayLayer = subresourceRange->baseArrayLayer + s, - .layerCount = 1}, - }, - 0, &(struct radv_image_view_extra_create_info){.disable_compression = true}); + radv_image_view_init(&load_iview, cmd_buffer->device, + &(VkImageViewCreateInfo){ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = radv_image_to_handle(image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image->vk.format, + .subresourceRange = {.aspectMask = subresourceRange->aspectMask, + .baseMipLevel = subresourceRange->baseMipLevel + l, + .levelCount = 1, + .baseArrayLayer = subresourceRange->baseArrayLayer + s, + .layerCount = 1}, + }, + 0, &(struct radv_image_view_extra_create_info){.enable_compression = true}); + radv_image_view_init(&store_iview, cmd_buffer->device, + &(VkImageViewCreateInfo){ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = radv_image_to_handle(image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image->vk.format, + .subresourceRange = {.aspectMask = subresourceRange->aspectMask, + .baseMipLevel = subresourceRange->baseMipLevel + l, + .levelCount = 1, + .baseArrayLayer = subresourceRange->baseArrayLayer + s, + .layerCount = 1}, + }, + 0, &(struct radv_image_view_extra_create_info){.disable_compression = true}); radv_meta_push_descriptor_set( - cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - device->meta_state.expand_depth_stencil_compute_p_layout, 0, /* set */ + cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.expand_depth_stencil_compute_p_layout, + 0, /* set */ 2, /* descriptorWriteCount */ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstBinding = 0, @@ -622,9 +595,8 @@ radv_expand_depth_stencil_compute(struct radv_cmd_buffer *cmd_buffer, struct rad radv_meta_restore(&saved_state, cmd_buffer); - cmd_buffer->state.flush_bits |= - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | - radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | + radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); /* Initialize the HTILE metadata as "fully expanded". */ uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image); diff --git a/src/amd/vulkan/meta/radv_meta_etc_decode.c b/src/amd/vulkan/meta/radv_meta_etc_decode.c index 707ba93..c4d1ec4 100644 --- a/src/amd/vulkan/meta/radv_meta_etc_decode.c +++ b/src/amd/vulkan/meta/radv_meta_etc_decode.c @@ -50,9 +50,8 @@ flip_endian(nir_builder *b, nir_ssa_def *src, unsigned cnt) nir_ssa_def *chan = cnt == 1 ? src : nir_channel(b, src, i); for (unsigned j = 0; j < 4; ++j) intermediate[j] = nir_ubfe_imm(b, chan, 8 * j, 8); - v[i] = nir_ior( - b, nir_ior(b, nir_ishl_imm(b, intermediate[0], 24), nir_ishl_imm(b, intermediate[1], 16)), - nir_ior(b, nir_ishl_imm(b, intermediate[2], 8), nir_ishl_imm(b, intermediate[3], 0))); + v[i] = nir_ior(b, nir_ior(b, nir_ishl_imm(b, intermediate[0], 24), nir_ishl_imm(b, intermediate[1], 16)), + nir_ior(b, nir_ishl_imm(b, intermediate[2], 8), nir_ishl_imm(b, intermediate[3], 0))); } return cnt == 1 ? v[0] : nir_vec(b, v, cnt); } @@ -60,13 +59,11 @@ flip_endian(nir_builder *b, nir_ssa_def *src, unsigned cnt) static nir_ssa_def * etc1_color_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) { - const unsigned table[8][2] = {{2, 8}, {5, 17}, {9, 29}, {13, 42}, - {18, 60}, {24, 80}, {33, 106}, {47, 183}}; + const unsigned table[8][2] = {{2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}}; nir_ssa_def *upper = nir_ieq_imm(b, y, 1); nir_ssa_def *result = NULL; for (unsigned i = 0; i < 8; ++i) { - nir_ssa_def *tmp = - nir_bcsel(b, upper, nir_imm_int(b, table[i][1]), nir_imm_int(b, table[i][0])); + nir_ssa_def *tmp = nir_bcsel(b, upper, nir_imm_int(b, table[i][1]), nir_imm_int(b, table[i][0])); if (result) result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result); else @@ -114,8 +111,8 @@ etc_extend(nir_builder *b, nir_ssa_def *v, int bits) } static nir_ssa_def * -decode_etc2_alpha(struct nir_builder *b, nir_ssa_def *alpha_payload, nir_ssa_def *linear_pixel, - bool eac, nir_ssa_def *is_signed) +decode_etc2_alpha(struct nir_builder *b, nir_ssa_def *alpha_payload, nir_ssa_def *linear_pixel, bool eac, + nir_ssa_def *is_signed) { alpha_payload = flip_endian(b, alpha_payload, 2); nir_ssa_def *alpha_x = nir_channel(b, alpha_payload, 1); @@ -133,15 +130,12 @@ decode_etc2_alpha(struct nir_builder *b, nir_ssa_def *alpha_payload, nir_ssa_def multiplier = nir_imax(b, nir_imul_imm(b, multiplier, 8), nir_imm_int(b, 1)); } - nir_ssa_def *lsb_index = - nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x), - nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 2)); + nir_ssa_def *lsb_index = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x), + nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 2)); bit_offset = nir_iadd_imm(b, bit_offset, 2); - nir_ssa_def *msb = - nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x), - nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 1)); - nir_ssa_def *mod = - nir_ixor(b, etc1_alpha_modifier_lookup(b, table, lsb_index), nir_iadd_imm(b, msb, -1)); + nir_ssa_def *msb = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x), + nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 1)); + nir_ssa_def *mod = nir_ixor(b, etc1_alpha_modifier_lookup(b, table, lsb_index), nir_iadd_imm(b, msb, -1)); nir_ssa_def *a = nir_iadd(b, base, nir_imul(b, mod, multiplier)); nir_ssa_def *low_bound = nir_imm_int(b, 0); @@ -150,8 +144,7 @@ decode_etc2_alpha(struct nir_builder *b, nir_ssa_def *alpha_payload, nir_ssa_def if (eac) { low_bound = nir_bcsel(b, is_signed, nir_imm_int(b, -1023), low_bound); high_bound = nir_bcsel(b, is_signed, nir_imm_int(b, 1023), nir_imm_int(b, 2047)); - final_mult = - nir_bcsel(b, is_signed, nir_imm_float(b, 1 / 1023.0), nir_imm_float(b, 1 / 2047.0)); + final_mult = nir_bcsel(b, is_signed, nir_imm_float(b, 1 / 1023.0), nir_imm_float(b, 1 / 2047.0)); } return nir_fmul(b, nir_i2f32(b, nir_iclamp(b, a, low_bound, high_bound)), final_mult); @@ -160,83 +153,67 @@ decode_etc2_alpha(struct nir_builder *b, nir_ssa_def *alpha_payload, nir_ssa_def static nir_shader * build_shader(struct radv_device *dev) { - const struct glsl_type *sampler_type_2d = - glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, GLSL_TYPE_UINT); - const struct glsl_type *sampler_type_3d = - glsl_sampler_type(GLSL_SAMPLER_DIM_3D, false, false, GLSL_TYPE_UINT); - const struct glsl_type *img_type_2d = - glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT); - const struct glsl_type *img_type_3d = - glsl_image_type(GLSL_SAMPLER_DIM_3D, false, GLSL_TYPE_FLOAT); + const struct glsl_type *sampler_type_2d = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, GLSL_TYPE_UINT); + const struct glsl_type *sampler_type_3d = glsl_sampler_type(GLSL_SAMPLER_DIM_3D, false, false, GLSL_TYPE_UINT); + const struct glsl_type *img_type_2d = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT); + const struct glsl_type *img_type_3d = glsl_image_type(GLSL_SAMPLER_DIM_3D, false, GLSL_TYPE_FLOAT); nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_decode_etc"); b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = 8; - nir_variable *input_img_2d = - nir_variable_create(b.shader, nir_var_uniform, sampler_type_2d, "s_tex_2d"); + nir_variable *input_img_2d = nir_variable_create(b.shader, nir_var_uniform, sampler_type_2d, "s_tex_2d"); input_img_2d->data.descriptor_set = 0; input_img_2d->data.binding = 0; - nir_variable *input_img_3d = - nir_variable_create(b.shader, nir_var_uniform, sampler_type_3d, "s_tex_3d"); + nir_variable *input_img_3d = nir_variable_create(b.shader, nir_var_uniform, sampler_type_3d, "s_tex_3d"); input_img_2d->data.descriptor_set = 0; input_img_2d->data.binding = 0; - nir_variable *output_img_2d = - nir_variable_create(b.shader, nir_var_image, img_type_2d, "out_img_2d"); + nir_variable *output_img_2d = nir_variable_create(b.shader, nir_var_image, img_type_2d, "out_img_2d"); output_img_2d->data.descriptor_set = 0; output_img_2d->data.binding = 1; - nir_variable *output_img_3d = - nir_variable_create(b.shader, nir_var_image, img_type_3d, "out_img_3d"); + nir_variable *output_img_3d = nir_variable_create(b.shader, nir_var_image, img_type_3d, "out_img_3d"); output_img_3d->data.descriptor_set = 0; output_img_3d->data.binding = 1; nir_ssa_def *global_id = get_global_ids(&b, 3); nir_ssa_def *consts = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16); - nir_ssa_def *consts2 = - nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4); + nir_ssa_def *consts2 = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4); nir_ssa_def *offset = nir_channels(&b, consts, 7); nir_ssa_def *format = nir_channel(&b, consts, 3); nir_ssa_def *image_type = nir_channel(&b, consts2, 0); nir_ssa_def *is_3d = nir_ieq_imm(&b, image_type, VK_IMAGE_TYPE_3D); nir_ssa_def *coord = nir_iadd(&b, global_id, offset); - nir_ssa_def *src_coord = - nir_vec3(&b, nir_ushr_imm(&b, nir_channel(&b, coord, 0), 2), - nir_ushr_imm(&b, nir_channel(&b, coord, 1), 2), nir_channel(&b, coord, 2)); + nir_ssa_def *src_coord = nir_vec3(&b, nir_ushr_imm(&b, nir_channel(&b, coord, 0), 2), + nir_ushr_imm(&b, nir_channel(&b, coord, 1), 2), nir_channel(&b, coord, 2)); - nir_variable *payload_var = - nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "payload"); + nir_variable *payload_var = nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "payload"); nir_push_if(&b, is_3d); { - nir_ssa_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_3d), src_coord, - nir_imm_int(&b, 0)); + nir_ssa_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_3d), src_coord, nir_imm_int(&b, 0)); nir_store_var(&b, payload_var, color, 0xf); } nir_push_else(&b, NULL); { - nir_ssa_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_2d), src_coord, - nir_imm_int(&b, 0)); + nir_ssa_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_2d), src_coord, nir_imm_int(&b, 0)); nir_store_var(&b, payload_var, color, 0xf); } nir_pop_if(&b, NULL); nir_ssa_def *pixel_coord = nir_iand_imm(&b, nir_channels(&b, coord, 3), 3); - nir_ssa_def *linear_pixel = nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, pixel_coord, 0), 4), - nir_channel(&b, pixel_coord, 1)); + nir_ssa_def *linear_pixel = + nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, pixel_coord, 0), 4), nir_channel(&b, pixel_coord, 1)); nir_ssa_def *payload = nir_load_var(&b, payload_var); - nir_variable *color = - nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "color"); + nir_variable *color = nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "color"); nir_store_var(&b, color, nir_imm_vec4(&b, 1.0, 0.0, 0.0, 1.0), 0xf); nir_push_if(&b, nir_ilt_imm(&b, format, VK_FORMAT_EAC_R11_UNORM_BLOCK)); { - nir_ssa_def *alpha_bits_8 = - nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK); - nir_ssa_def *alpha_bits_1 = - nir_iand(&b, nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK), - nir_ilt_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK)); + nir_ssa_def *alpha_bits_8 = nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK); + nir_ssa_def *alpha_bits_1 = nir_iand(&b, nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK), + nir_ilt_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK)); nir_ssa_def *color_payload = nir_bcsel(&b, alpha_bits_8, nir_channels(&b, payload, 0xC), nir_channels(&b, payload, 3)); @@ -244,27 +221,22 @@ build_shader(struct radv_device *dev) nir_ssa_def *color_y = nir_channel(&b, color_payload, 0); nir_ssa_def *color_x = nir_channel(&b, color_payload, 1); nir_ssa_def *flip = nir_test_mask(&b, color_y, 1); - nir_ssa_def *subblock = nir_ushr_imm( - &b, nir_bcsel(&b, flip, nir_channel(&b, pixel_coord, 1), nir_channel(&b, pixel_coord, 0)), - 1); - - nir_variable *punchthrough = - nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "punchthrough"); - nir_ssa_def *punchthrough_init = - nir_iand(&b, alpha_bits_1, nir_inot(&b, nir_test_mask(&b, color_y, 2))); + nir_ssa_def *subblock = + nir_ushr_imm(&b, nir_bcsel(&b, flip, nir_channel(&b, pixel_coord, 1), nir_channel(&b, pixel_coord, 0)), 1); + + nir_variable *punchthrough = nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "punchthrough"); + nir_ssa_def *punchthrough_init = nir_iand(&b, alpha_bits_1, nir_inot(&b, nir_test_mask(&b, color_y, 2))); nir_store_var(&b, punchthrough, punchthrough_init, 0x1); - nir_variable *etc1_compat = - nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "etc1_compat"); + nir_variable *etc1_compat = nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "etc1_compat"); nir_store_var(&b, etc1_compat, nir_imm_false(&b), 0x1); nir_variable *alpha_result = nir_variable_create(b.shader, nir_var_shader_temp, glsl_float_type(), "alpha_result"); nir_push_if(&b, alpha_bits_8); { - nir_store_var( - &b, alpha_result, - decode_etc2_alpha(&b, nir_channels(&b, payload, 3), linear_pixel, false, NULL), 1); + nir_store_var(&b, alpha_result, decode_etc2_alpha(&b, nir_channels(&b, payload, 3), linear_pixel, false, NULL), + 1); } nir_push_else(&b, NULL); { @@ -273,27 +245,21 @@ build_shader(struct radv_device *dev) nir_pop_if(&b, NULL); const struct glsl_type *uvec3_type = glsl_vector_type(GLSL_TYPE_UINT, 3); - nir_variable *rgb_result = - nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "rgb_result"); - nir_variable *base_rgb = - nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "base_rgb"); + nir_variable *rgb_result = nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "rgb_result"); + nir_variable *base_rgb = nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "base_rgb"); nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 255, 0, 0), 0x7); - nir_ssa_def *msb = - nir_iand_imm(&b, nir_ushr(&b, color_x, nir_iadd_imm(&b, linear_pixel, 15)), 2); + nir_ssa_def *msb = nir_iand_imm(&b, nir_ushr(&b, color_x, nir_iadd_imm(&b, linear_pixel, 15)), 2); nir_ssa_def *lsb = nir_iand_imm(&b, nir_ushr(&b, color_x, linear_pixel), 1); - nir_push_if( - &b, nir_iand(&b, nir_inot(&b, alpha_bits_1), nir_inot(&b, nir_test_mask(&b, color_y, 2)))); + nir_push_if(&b, nir_iand(&b, nir_inot(&b, alpha_bits_1), nir_inot(&b, nir_test_mask(&b, color_y, 2)))); { nir_store_var(&b, etc1_compat, nir_imm_true(&b), 1); nir_ssa_def *tmp[3]; for (unsigned i = 0; i < 3; ++i) tmp[i] = etc_extend( &b, - nir_iand_imm(&b, - nir_ushr(&b, color_y, - nir_isub_imm(&b, 28 - 8 * i, nir_imul_imm(&b, subblock, 4))), + nir_iand_imm(&b, nir_ushr(&b, color_y, nir_isub_imm(&b, 28 - 8 * i, nir_imul_imm(&b, subblock, 4))), 0xf), 4); nir_store_var(&b, base_rgb, nir_vec(&b, tmp, 3), 0x7); @@ -312,21 +278,20 @@ build_shader(struct radv_device *dev) nir_push_if(&b, nir_ugt_imm(&b, r1, 31)); { - nir_ssa_def *r0 = nir_ior(&b, nir_ubfe_imm(&b, color_y, 24, 2), - nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 27, 2), 2)); + nir_ssa_def *r0 = + nir_ior(&b, nir_ubfe_imm(&b, color_y, 24, 2), nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 27, 2), 2)); nir_ssa_def *g0 = nir_ubfe_imm(&b, color_y, 20, 4); nir_ssa_def *b0 = nir_ubfe_imm(&b, color_y, 16, 4); nir_ssa_def *r2 = nir_ubfe_imm(&b, color_y, 12, 4); nir_ssa_def *g2 = nir_ubfe_imm(&b, color_y, 8, 4); nir_ssa_def *b2 = nir_ubfe_imm(&b, color_y, 4, 4); - nir_ssa_def *da = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 2), 1), - nir_iand_imm(&b, color_y, 1)); + nir_ssa_def *da = + nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 2), 1), nir_iand_imm(&b, color_y, 1)); nir_ssa_def *dist = etc2_distance_lookup(&b, da); nir_ssa_def *index = nir_ior(&b, lsb, msb); nir_store_var(&b, punchthrough, - nir_iand(&b, nir_load_var(&b, punchthrough), - nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), + nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), 0x1); nir_push_if(&b, nir_ieq_imm(&b, index, 0)); { @@ -347,43 +312,38 @@ build_shader(struct radv_device *dev) nir_ssa_def *r0 = nir_ubfe_imm(&b, color_y, 27, 4); nir_ssa_def *g0 = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 24, 3), 1), nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 20), 1)); - nir_ssa_def *b0 = nir_ior(&b, nir_ubfe_imm(&b, color_y, 15, 3), - nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 16), 8)); + nir_ssa_def *b0 = + nir_ior(&b, nir_ubfe_imm(&b, color_y, 15, 3), nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 16), 8)); nir_ssa_def *r2 = nir_ubfe_imm(&b, color_y, 11, 4); nir_ssa_def *g2 = nir_ubfe_imm(&b, color_y, 7, 4); nir_ssa_def *b2 = nir_ubfe_imm(&b, color_y, 3, 4); nir_ssa_def *da = nir_iand_imm(&b, color_y, 4); nir_ssa_def *db = nir_iand_imm(&b, color_y, 1); nir_ssa_def *d = nir_iadd(&b, da, nir_imul_imm(&b, db, 2)); - nir_ssa_def *d0 = - nir_iadd(&b, nir_ishl_imm(&b, r0, 16), nir_iadd(&b, nir_ishl_imm(&b, g0, 8), b0)); - nir_ssa_def *d2 = - nir_iadd(&b, nir_ishl_imm(&b, r2, 16), nir_iadd(&b, nir_ishl_imm(&b, g2, 8), b2)); + nir_ssa_def *d0 = nir_iadd(&b, nir_ishl_imm(&b, r0, 16), nir_iadd(&b, nir_ishl_imm(&b, g0, 8), b0)); + nir_ssa_def *d2 = nir_iadd(&b, nir_ishl_imm(&b, r2, 16), nir_iadd(&b, nir_ishl_imm(&b, g2, 8), b2)); d = nir_bcsel(&b, nir_uge(&b, d0, d2), nir_iadd_imm(&b, d, 1), d); nir_ssa_def *dist = etc2_distance_lookup(&b, d); - nir_ssa_def *base = nir_bcsel(&b, nir_ine_imm(&b, msb, 0), nir_vec3(&b, r2, g2, b2), - nir_vec3(&b, r0, g0, b0)); + nir_ssa_def *base = + nir_bcsel(&b, nir_ine_imm(&b, msb, 0), nir_vec3(&b, r2, g2, b2), nir_vec3(&b, r0, g0, b0)); base = etc_extend(&b, base, 4); - base = nir_iadd(&b, base, - nir_imul(&b, dist, nir_isub_imm(&b, 1, nir_imul_imm(&b, lsb, 2)))); + base = nir_iadd(&b, base, nir_imul(&b, dist, nir_isub_imm(&b, 1, nir_imul_imm(&b, lsb, 2)))); nir_store_var(&b, rgb_result, base, 0x7); nir_store_var(&b, punchthrough, - nir_iand(&b, nir_load_var(&b, punchthrough), - nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), + nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), 0x1); } nir_push_else(&b, NULL); nir_push_if(&b, nir_ugt_imm(&b, b1, 31)); { nir_ssa_def *r0 = nir_ubfe_imm(&b, color_y, 25, 6); - nir_ssa_def *g0 = nir_ior(&b, nir_ubfe_imm(&b, color_y, 17, 6), - nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 18), 0x40)); - nir_ssa_def *b0 = - nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 11, 2), 3), - nir_ior(&b, nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 11), 0x20), - nir_ubfe_imm(&b, color_y, 7, 3))); - nir_ssa_def *rh = nir_ior(&b, nir_iand_imm(&b, color_y, 1), - nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 5), 1)); + nir_ssa_def *g0 = + nir_ior(&b, nir_ubfe_imm(&b, color_y, 17, 6), nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 18), 0x40)); + nir_ssa_def *b0 = nir_ior( + &b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 11, 2), 3), + nir_ior(&b, nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 11), 0x20), nir_ubfe_imm(&b, color_y, 7, 3))); + nir_ssa_def *rh = + nir_ior(&b, nir_iand_imm(&b, color_y, 1), nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 5), 1)); nir_ssa_def *rv = nir_ubfe_imm(&b, color_x, 13, 6); nir_ssa_def *gh = nir_ubfe_imm(&b, color_x, 25, 7); nir_ssa_def *gv = nir_ubfe_imm(&b, color_x, 6, 7); @@ -401,10 +361,10 @@ build_shader(struct radv_device *dev) bv = etc_extend(&b, bv, 6); nir_ssa_def *rgb = nir_vec3(&b, r0, g0, b0); - nir_ssa_def *dx = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rh, gh, bh), rgb), - nir_channel(&b, pixel_coord, 0)); - nir_ssa_def *dy = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rv, gv, bv), rgb), - nir_channel(&b, pixel_coord, 1)); + nir_ssa_def *dx = + nir_imul(&b, nir_isub(&b, nir_vec3(&b, rh, gh, bh), rgb), nir_channel(&b, pixel_coord, 0)); + nir_ssa_def *dy = + nir_imul(&b, nir_isub(&b, nir_vec3(&b, rv, gv, bv), rgb), nir_channel(&b, pixel_coord, 1)); rgb = nir_iadd(&b, rgb, nir_ishr_imm(&b, nir_iadd_imm(&b, nir_iadd(&b, dx, dy), 2), 2)); nir_store_var(&b, rgb_result, rgb, 0x7); nir_store_var(&b, punchthrough, nir_imm_false(&b), 0x1); @@ -427,16 +387,13 @@ build_shader(struct radv_device *dev) nir_pop_if(&b, NULL); nir_push_if(&b, nir_load_var(&b, etc1_compat)); { - nir_ssa_def *etc1_table_index = nir_ubfe( - &b, color_y, nir_isub_imm(&b, 5, nir_imul_imm(&b, subblock, 3)), nir_imm_int(&b, 3)); + nir_ssa_def *etc1_table_index = + nir_ubfe(&b, color_y, nir_isub_imm(&b, 5, nir_imul_imm(&b, subblock, 3)), nir_imm_int(&b, 3)); nir_ssa_def *sgn = nir_isub_imm(&b, 1, msb); sgn = nir_bcsel(&b, nir_load_var(&b, punchthrough), nir_imul(&b, sgn, lsb), sgn); nir_store_var(&b, punchthrough, - nir_iand(&b, nir_load_var(&b, punchthrough), - nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), - 0x1); - nir_ssa_def *off = - nir_imul(&b, etc1_color_modifier_lookup(&b, etc1_table_index, lsb), sgn); + nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), 0x1); + nir_ssa_def *off = nir_imul(&b, etc1_color_modifier_lookup(&b, etc1_table_index, lsb), sgn); nir_ssa_def *result = nir_iadd(&b, nir_load_var(&b, base_rgb), off); nir_store_var(&b, rgb_result, result, 0x7); } @@ -449,8 +406,7 @@ build_shader(struct radv_device *dev) nir_pop_if(&b, NULL); nir_ssa_def *col[4]; for (unsigned i = 0; i < 3; ++i) - col[i] = nir_fdiv_imm(&b, nir_i2f32(&b, nir_channel(&b, nir_load_var(&b, rgb_result), i)), - 255.0); + col[i] = nir_fdiv_imm(&b, nir_i2f32(&b, nir_channel(&b, nir_load_var(&b, rgb_result), i)), 255.0); col[3] = nir_load_var(&b, alpha_result); nir_store_var(&b, color, nir_vec(&b, col, 4), 0xf); } @@ -460,8 +416,7 @@ build_shader(struct radv_device *dev) nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11G11_SNORM_BLOCK)); nir_ssa_def *val[4]; for (int i = 0; i < 2; ++i) { - val[i] = decode_etc2_alpha(&b, nir_channels(&b, payload, 3 << (2 * i)), linear_pixel, true, - is_signed); + val[i] = decode_etc2_alpha(&b, nir_channels(&b, payload, 3 << (2 * i)), linear_pixel, true, is_signed); } val[2] = nir_imm_float(&b, 0.0); val[3] = nir_imm_float(&b, 1.0); @@ -475,15 +430,13 @@ build_shader(struct radv_device *dev) nir_push_if(&b, is_3d); { - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_3d)->dest.ssa, img_coord, - nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), - .image_dim = GLSL_SAMPLER_DIM_3D); + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_3d)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32), + outval, nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_3D); } nir_push_else(&b, NULL); { - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_2d)->dest.ssa, img_coord, - nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), - .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true); + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_2d)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32), + outval, nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true); } nir_pop_if(&b, NULL); return b.shader; @@ -493,25 +446,23 @@ static VkResult create_layout(struct radv_device *device) { VkResult result; - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 2, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - {.binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, - &device->meta_state.alloc, + VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + {.binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, &device->meta_state.etc_decode.ds_layout); if (result != VK_SUCCESS) goto fail; @@ -524,9 +475,8 @@ create_layout(struct radv_device *device) .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20}, }; - result = - radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, &device->meta_state.etc_decode.p_layout); + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, + &device->meta_state.etc_decode.p_layout); if (result != VK_SUCCESS) goto fail; return VK_SUCCESS; @@ -564,8 +514,8 @@ create_decode_pipeline(struct radv_device *device, VkPipeline *pipeline) .layout = device->meta_state.resolve_compute.p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info, NULL, pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + NULL, pipeline); if (result != VK_SUCCESS) goto fail; @@ -602,10 +552,9 @@ radv_device_finish_meta_etc_decode_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; radv_DestroyPipeline(radv_device_to_handle(device), state->etc_decode.pipeline, &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->etc_decode.p_layout, - &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), - state->etc_decode.ds_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->etc_decode.p_layout, &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->etc_decode.ds_layout, + &state->alloc); } static VkPipeline @@ -628,63 +577,59 @@ radv_get_etc_decode_pipeline(struct radv_cmd_buffer *cmd_buffer) } static void -decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, - struct radv_image_view *dst_iview, const VkOffset3D *offset, const VkExtent3D *extent) +decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, struct radv_image_view *dst_iview, + const VkOffset3D *offset, const VkExtent3D *extent) { struct radv_device *device = cmd_buffer->device; - radv_meta_push_descriptor_set( - cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout, - 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = - (VkDescriptorImageInfo[]){ - {.sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, - }}, - {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]){ - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(dst_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - }}}); + radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + device->meta_state.resolve_compute.p_layout, 0, /* set */ + 2, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = + (VkDescriptorImageInfo[]){ + {.sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, + }}, + {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]){ + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(dst_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }}}); VkPipeline pipeline = radv_get_etc_decode_pipeline(cmd_buffer); - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); unsigned push_constants[5] = { offset->x, offset->y, offset->z, src_iview->image->vk.format, src_iview->image->vk.image_type, }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, 20, push_constants); + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_compute.p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, 20, push_constants); radv_unaligned_dispatch(cmd_buffer, extent->width, extent->height, extent->depth); } void -radv_meta_decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - VkImageLayout layout, const VkImageSubresourceLayers *subresource, - VkOffset3D offset, VkExtent3D extent) +radv_meta_decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout layout, + const VkImageSubresourceLayers *subresource, VkOffset3D offset, VkExtent3D extent) { struct radv_meta_saved_state saved_state; radv_meta_save(&saved_state, cmd_buffer, - RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | - RADV_META_SAVE_DESCRIPTORS | RADV_META_SUSPEND_PREDICATING); + RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS | + RADV_META_SUSPEND_PREDICATING); uint32_t base_slice = radv_meta_get_iview_layer(image, subresource, &offset); uint32_t slice_count = image->vk.image_type == VK_IMAGE_TYPE_3D ? extent.depth : subresource->layerCount; @@ -692,27 +637,25 @@ radv_meta_decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *imag extent = vk_image_sanitize_extent(&image->vk, extent); offset = vk_image_sanitize_offset(&image->vk, offset); - VkFormat load_format = vk_format_get_blocksize(image->vk.format) == 16 - ? VK_FORMAT_R32G32B32A32_UINT - : VK_FORMAT_R32G32_UINT; + VkFormat load_format = + vk_format_get_blocksize(image->vk.format) == 16 ? VK_FORMAT_R32G32B32A32_UINT : VK_FORMAT_R32G32_UINT; struct radv_image_view src_iview; - radv_image_view_init( - &src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo){ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = radv_image_to_handle(image), - .viewType = radv_meta_get_view_type(image), - .format = load_format, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = subresource->mipLevel, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = subresource->baseArrayLayer + subresource->layerCount, - }, - }, - 0, NULL); + radv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo){ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = radv_image_to_handle(image), + .viewType = radv_meta_get_view_type(image), + .format = load_format, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = subresource->mipLevel, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = subresource->baseArrayLayer + subresource->layerCount, + }, + }, + 0, NULL); VkFormat store_format; switch (image->vk.format) { @@ -732,23 +675,22 @@ radv_meta_decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *imag store_format = VK_FORMAT_R8G8B8A8_UNORM; } struct radv_image_view dst_iview; - radv_image_view_init( - &dst_iview, cmd_buffer->device, - &(VkImageViewCreateInfo){ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = radv_image_to_handle(image), - .viewType = radv_meta_get_view_type(image), - .format = store_format, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_PLANE_1_BIT, - .baseMipLevel = subresource->mipLevel, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = subresource->baseArrayLayer + subresource->layerCount, - }, - }, - 0, NULL); + radv_image_view_init(&dst_iview, cmd_buffer->device, + &(VkImageViewCreateInfo){ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = radv_image_to_handle(image), + .viewType = radv_meta_get_view_type(image), + .format = store_format, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_PLANE_1_BIT, + .baseMipLevel = subresource->mipLevel, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = subresource->baseArrayLayer + subresource->layerCount, + }, + }, + 0, NULL); decode_etc(cmd_buffer, &src_iview, &dst_iview, &(VkOffset3D){offset.x, offset.y, base_slice}, &(VkExtent3D){extent.width, extent.height, slice_count}); diff --git a/src/amd/vulkan/meta/radv_meta_fast_clear.c b/src/amd/vulkan/meta/radv_meta_fast_clear.c index d6027bf..3ece241 100644 --- a/src/amd/vulkan/meta/radv_meta_fast_clear.c +++ b/src/amd/vulkan/meta/radv_meta_fast_clear.c @@ -53,14 +53,12 @@ build_dcc_decompress_compute_shader(struct radv_device *dev) output_img->data.binding = 1; nir_ssa_def *global_id = get_global_ids(&b, 2); - nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, global_id, 0), - nir_channel(&b, global_id, 1), - nir_ssa_undef(&b, 1, 32), - nir_ssa_undef(&b, 1, 32)); + nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, global_id, 0), nir_channel(&b, global_id, 1), + nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32)); - nir_ssa_def *data = nir_image_deref_load( - &b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32), - nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D); + nir_ssa_def *data = + nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, img_coord, + nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D); /* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid * creating a vmcnt(0) because it expects the L1 cache to keep memory @@ -69,9 +67,8 @@ build_dcc_decompress_compute_shader(struct radv_device *dev) nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE, .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo); - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, - nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0), - .image_dim = GLSL_SAMPLER_DIM_2D); + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32), data, + nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D); return b.shader; } @@ -81,26 +78,24 @@ create_dcc_compress_compute(struct radv_device *device) VkResult result = VK_SUCCESS; nir_shader *cs = build_dcc_decompress_compute_shader(device); - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 2, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - {.binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout( - radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, - &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout); + VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + {.binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, + &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout); if (result != VK_SUCCESS) goto cleanup; @@ -112,9 +107,8 @@ create_dcc_compress_compute(struct radv_device *device) .pPushConstantRanges = NULL, }; - result = radv_CreatePipelineLayout( - radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, - &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout); + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, + &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout); if (result != VK_SUCCESS) goto cleanup; @@ -135,9 +129,8 @@ create_dcc_compress_compute(struct radv_device *device) .layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, }; - result = radv_compute_pipeline_create( - radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, NULL, - &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + NULL, &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline); if (result != VK_SUCCESS) goto cleanup; @@ -157,8 +150,7 @@ create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout) .pPushConstantRanges = NULL, }; - return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, layout); + return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, layout); } static VkResult @@ -208,8 +200,8 @@ create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipeli .attachmentCount = 1, .pAttachments = (VkPipelineColorBlendAttachmentState[]){ { - .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | + VK_COLOR_COMPONENT_A_BIT, }, }}; const VkPipelineRasterizationStateCreateInfo rs_state = { @@ -228,103 +220,103 @@ create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipeli .pColorAttachmentFormats = &color_format, }; - result = radv_graphics_pipeline_create( - device_h, device->meta_state.cache, - &(VkGraphicsPipelineCreateInfo){ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = &rendering_create_info, - .stageCount = 2, - .pStages = stages, - - .pVertexInputState = &vi_state, - .pInputAssemblyState = &ia_state, - - .pViewportState = - &(VkPipelineViewportStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, - }, - .pRasterizationState = &rs_state, - .pMultisampleState = - &(VkPipelineMultisampleStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterizationSamples = 1, - .sampleShadingEnable = false, - .pSampleMask = NULL, - .alphaToCoverageEnable = false, - .alphaToOneEnable = false, - }, - .pColorBlendState = &blend_state, - .pDynamicState = - &(VkPipelineDynamicStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 2, - .pDynamicStates = - (VkDynamicState[]){ - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }, - }, - .layout = layout, - .renderPass = VK_NULL_HANDLE, - .subpass = 0, - }, - &(struct radv_graphics_pipeline_create_info){ - .use_rectlist = true, - .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR, - }, - &device->meta_state.alloc, &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline); + result = radv_graphics_pipeline_create(device_h, device->meta_state.cache, + &(VkGraphicsPipelineCreateInfo){ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = &rendering_create_info, + .stageCount = 2, + .pStages = stages, + + .pVertexInputState = &vi_state, + .pInputAssemblyState = &ia_state, + + .pViewportState = + &(VkPipelineViewportStateCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &rs_state, + .pMultisampleState = + &(VkPipelineMultisampleStateCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = NULL, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + }, + .pColorBlendState = &blend_state, + .pDynamicState = + &(VkPipelineDynamicStateCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 2, + .pDynamicStates = + (VkDynamicState[]){ + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }, + }, + .layout = layout, + .renderPass = VK_NULL_HANDLE, + .subpass = 0, + }, + &(struct radv_graphics_pipeline_create_info){ + .use_rectlist = true, + .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR, + }, + &device->meta_state.alloc, + &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline); if (result != VK_SUCCESS) goto cleanup; - result = radv_graphics_pipeline_create( - device_h, device->meta_state.cache, - &(VkGraphicsPipelineCreateInfo){ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = &rendering_create_info, - .stageCount = 2, - .pStages = stages, - - .pVertexInputState = &vi_state, - .pInputAssemblyState = &ia_state, - - .pViewportState = - &(VkPipelineViewportStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, - }, - .pRasterizationState = &rs_state, - .pMultisampleState = - &(VkPipelineMultisampleStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterizationSamples = 1, - .sampleShadingEnable = false, - .pSampleMask = NULL, - .alphaToCoverageEnable = false, - .alphaToOneEnable = false, - }, - .pColorBlendState = &blend_state, - .pDynamicState = - &(VkPipelineDynamicStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 2, - .pDynamicStates = - (VkDynamicState[]){ - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }, - }, - .layout = layout, - .renderPass = VK_NULL_HANDLE, - .subpass = 0, - }, - &(struct radv_graphics_pipeline_create_info){ - .use_rectlist = true, - .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS, - }, - &device->meta_state.alloc, &device->meta_state.fast_clear_flush.fmask_decompress_pipeline); + result = radv_graphics_pipeline_create(device_h, device->meta_state.cache, + &(VkGraphicsPipelineCreateInfo){ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = &rendering_create_info, + .stageCount = 2, + .pStages = stages, + + .pVertexInputState = &vi_state, + .pInputAssemblyState = &ia_state, + + .pViewportState = + &(VkPipelineViewportStateCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &rs_state, + .pMultisampleState = + &(VkPipelineMultisampleStateCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = NULL, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + }, + .pColorBlendState = &blend_state, + .pDynamicState = + &(VkPipelineDynamicStateCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 2, + .pDynamicStates = + (VkDynamicState[]){ + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }, + }, + .layout = layout, + .renderPass = VK_NULL_HANDLE, + .subpass = 0, + }, + &(struct radv_graphics_pipeline_create_info){ + .use_rectlist = true, + .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS, + }, + &device->meta_state.alloc, + &device->meta_state.fast_clear_flush.fmask_decompress_pipeline); if (result != VK_SUCCESS) goto cleanup; @@ -372,9 +364,8 @@ create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipeli }, &(struct radv_graphics_pipeline_create_info){ .use_rectlist = true, - .custom_blend_mode = device->physical_device->rad_info.gfx_level >= GFX11 - ? V_028808_CB_DCC_DECOMPRESS_GFX11 - : V_028808_CB_DCC_DECOMPRESS_GFX8, + .custom_blend_mode = device->physical_device->rad_info.gfx_level >= GFX11 ? V_028808_CB_DCC_DECOMPRESS_GFX11 + : V_028808_CB_DCC_DECOMPRESS_GFX8, }, &device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline); if (result != VK_SUCCESS) @@ -390,23 +381,18 @@ radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; - radv_DestroyPipeline(radv_device_to_handle(device), - state->fast_clear_flush.dcc_decompress_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->fast_clear_flush.fmask_decompress_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->fast_clear_flush.cmask_eliminate_pipeline, &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.p_layout, - &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->fast_clear_flush.fmask_decompress_pipeline, + &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->fast_clear_flush.cmask_eliminate_pipeline, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.p_layout, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->fast_clear_flush.dcc_decompress_compute_pipeline, &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), - state->fast_clear_flush.dcc_decompress_compute_p_layout, + radv_DestroyPipeline(radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_compute_pipeline, + &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_compute_p_layout, &state->alloc); device->vk.dispatch_table.DestroyDescriptorSetLayout( - radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_compute_ds_layout, - &state->alloc); + radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_compute_ds_layout, &state->alloc); } static VkResult @@ -457,9 +443,8 @@ radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on } static void -radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, uint64_t pred_offset, - bool value) +radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + uint64_t pred_offset, bool value) { uint64_t va = 0; @@ -473,8 +458,7 @@ radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer, static void radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - const VkImageSubresourceRange *range, int level, int layer, - bool flush_cb) + const VkImageSubresourceRange *range, int level, int layer, bool flush_cb) { struct radv_device *device = cmd_buffer->device; struct radv_image_view iview; @@ -510,10 +494,7 @@ radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_i const VkRenderingInfo rendering_info = { .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, - .renderArea = { - .offset = { 0, 0 }, - .extent = { width, height } - }, + .renderArea = {.offset = {0, 0}, .extent = {width, height}}, .layerCount = 1, .colorAttachmentCount = 1, .pColorAttachments = &color_att, @@ -522,14 +503,12 @@ radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_i radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info); if (flush_cb) - cmd_buffer->state.flush_bits |= - radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image); + cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image); radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); if (flush_cb) - cmd_buffer->state.flush_bits |= - radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image); + cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image); radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer)); @@ -590,8 +569,7 @@ radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image * } } - radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | - RADV_META_SAVE_RENDER); + radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_RENDER); if (pred_offset) { pred_offset += 8 * subresourceRange->baseMipLevel; @@ -602,8 +580,7 @@ radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image * cmd_buffer->state.predicating = true; } - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, - *pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, subresourceRange); ++l) { uint32_t width, height; @@ -615,13 +592,9 @@ radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image * width = radv_minify(image->vk.extent.width, subresourceRange->baseMipLevel + l); height = radv_minify(image->vk.extent.height, subresourceRange->baseMipLevel + l); - radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, - &(VkViewport){.x = 0, - .y = 0, - .width = width, - .height = height, - .minDepth = 0.0f, - .maxDepth = 1.0f}); + radv_CmdSetViewport( + radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, + &(VkViewport){.x = 0, .y = 0, .width = width, .height = height, .minDepth = 0.0f, .maxDepth = 1.0f}); radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D){ @@ -634,8 +607,7 @@ radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image * } } - cmd_buffer->state.flush_bits |= - RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; if (pred_offset) { pred_offset += 8 * subresourceRange->baseMipLevel; @@ -646,8 +618,7 @@ radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image * if (cmd_buffer->state.predication_type != -1) { /* Restore previous conditional rendering user state. */ - si_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type, - cmd_buffer->state.predication_op, + si_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type, cmd_buffer->state.predication_op, cmd_buffer->state.predication_va); } } @@ -719,8 +690,7 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_imag struct radv_image_view store_iview = {0}; struct radv_device *device = cmd_buffer->device; - cmd_buffer->state.flush_bits |= - radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); + cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) { VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device); @@ -730,8 +700,7 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_imag } } - radv_meta_save(&saved_state, cmd_buffer, - RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE); + radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE); radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline); @@ -747,39 +716,37 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_imag height = radv_minify(image->vk.extent.height, subresourceRange->baseMipLevel + l); for (uint32_t s = 0; s < vk_image_subresource_layer_count(&image->vk, subresourceRange); s++) { - radv_image_view_init( - &load_iview, cmd_buffer->device, - &(VkImageViewCreateInfo){ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = radv_image_to_handle(image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = image->vk.format, - .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = subresourceRange->baseMipLevel + l, - .levelCount = 1, - .baseArrayLayer = subresourceRange->baseArrayLayer + s, - .layerCount = 1}, - }, - 0, &(struct radv_image_view_extra_create_info){.enable_compression = true}); - radv_image_view_init( - &store_iview, cmd_buffer->device, - &(VkImageViewCreateInfo){ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = radv_image_to_handle(image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = image->vk.format, - .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = subresourceRange->baseMipLevel + l, - .levelCount = 1, - .baseArrayLayer = subresourceRange->baseArrayLayer + s, - .layerCount = 1}, - }, - 0, &(struct radv_image_view_extra_create_info){.disable_compression = true}); + radv_image_view_init(&load_iview, cmd_buffer->device, + &(VkImageViewCreateInfo){ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = radv_image_to_handle(image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image->vk.format, + .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = subresourceRange->baseMipLevel + l, + .levelCount = 1, + .baseArrayLayer = subresourceRange->baseArrayLayer + s, + .layerCount = 1}, + }, + 0, &(struct radv_image_view_extra_create_info){.enable_compression = true}); + radv_image_view_init(&store_iview, cmd_buffer->device, + &(VkImageViewCreateInfo){ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = radv_image_to_handle(image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image->vk.format, + .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = subresourceRange->baseMipLevel + l, + .levelCount = 1, + .baseArrayLayer = subresourceRange->baseArrayLayer + s, + .layerCount = 1}, + }, + 0, &(struct radv_image_view_extra_create_info){.disable_compression = true}); radv_meta_push_descriptor_set( cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, 0, /* set */ - 2, /* descriptorWriteCount */ + 2, /* descriptorWriteCount */ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstBinding = 0, .dstArrayElement = 0, @@ -818,9 +785,8 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_imag radv_meta_restore(&saved_state, cmd_buffer); - cmd_buffer->state.flush_bits |= - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | - radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | + radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); /* Initialize the DCC metadata as "fully expanded". */ cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, subresourceRange, 0xffffffff); diff --git a/src/amd/vulkan/meta/radv_meta_fmask_copy.c b/src/amd/vulkan/meta/radv_meta_fmask_copy.c index 3c6e652..0bae313 100644 --- a/src/amd/vulkan/meta/radv_meta_fmask_copy.c +++ b/src/amd/vulkan/meta/radv_meta_fmask_copy.c @@ -29,8 +29,7 @@ build_fmask_copy_compute_shader(struct radv_device *dev, int samples) const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT); const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, false, GLSL_TYPE_FLOAT); - nir_builder b = - radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_fmask_copy_cs_-%d", samples); + nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_fmask_copy_cs_-%d", samples); b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = 8; @@ -45,34 +44,30 @@ build_fmask_copy_compute_shader(struct radv_device *dev, int samples) nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32); - nir_ssa_def *block_size = - nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1], - b.shader->info.workgroup_size[2]); + nir_ssa_def *block_size = nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1], + b.shader->info.workgroup_size[2]); nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); /* Get coordinates. */ nir_ssa_def *src_coord = nir_trim_vector(&b, global_id, 2); - nir_ssa_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0), - nir_channel(&b, src_coord, 1), - nir_ssa_undef(&b, 1, 32), - nir_ssa_undef(&b, 1, 32)); + nir_ssa_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0), nir_channel(&b, src_coord, 1), + nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32)); nir_tex_src frag_mask_srcs[] = {{ .src_type = nir_tex_src_coord, .src = nir_src_for_ssa(src_coord), }}; - nir_ssa_def *frag_mask = nir_build_tex_deref_instr(&b, nir_texop_fragment_mask_fetch_amd, - nir_build_deref_var(&b, input_img), NULL, - ARRAY_SIZE(frag_mask_srcs), frag_mask_srcs); + nir_ssa_def *frag_mask = + nir_build_tex_deref_instr(&b, nir_texop_fragment_mask_fetch_amd, nir_build_deref_var(&b, input_img), NULL, + ARRAY_SIZE(frag_mask_srcs), frag_mask_srcs); /* Get the maximum sample used in this fragment. */ nir_ssa_def *max_sample_index = nir_imm_int(&b, 0); for (uint32_t s = 0; s < samples; s++) { /* max_sample_index = MAX2(max_sample_index, (frag_mask >> (s * 4)) & 0xf) */ max_sample_index = nir_umax(&b, max_sample_index, - nir_ubitfield_extract(&b, frag_mask, nir_imm_int(&b, 4 * s), - nir_imm_int(&b, 4))); + nir_ubitfield_extract(&b, frag_mask, nir_imm_int(&b, 4 * s), nir_imm_int(&b, 4))); } nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter"); @@ -83,19 +78,19 @@ build_fmask_copy_compute_shader(struct radv_device *dev, int samples) nir_ssa_def *sample_id = nir_load_var(&b, counter); nir_tex_src frag_fetch_srcs[] = {{ - .src_type = nir_tex_src_coord, - .src = nir_src_for_ssa(src_coord), - }, { - .src_type = nir_tex_src_ms_index, - .src = nir_src_for_ssa(sample_id), - }}; - nir_ssa_def *outval = nir_build_tex_deref_instr(&b, nir_texop_fragment_fetch_amd, - nir_build_deref_var(&b, input_img), NULL, - ARRAY_SIZE(frag_fetch_srcs), frag_fetch_srcs); - - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord, - sample_id, outval, nir_imm_int(&b, 0), - .image_dim = GLSL_SAMPLER_DIM_MS); + .src_type = nir_tex_src_coord, + .src = nir_src_for_ssa(src_coord), + }, + { + .src_type = nir_tex_src_ms_index, + .src = nir_src_for_ssa(sample_id), + }}; + nir_ssa_def *outval = + nir_build_tex_deref_instr(&b, nir_texop_fragment_fetch_amd, nir_build_deref_var(&b, input_img), NULL, + ARRAY_SIZE(frag_fetch_srcs), frag_fetch_srcs); + + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord, sample_id, outval, + nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_MS); radv_break_on_count(&b, counter, max_sample_index); } @@ -109,10 +104,9 @@ radv_device_finish_meta_fmask_copy_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fmask_copy.p_layout, - &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), - state->fmask_copy.ds_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fmask_copy.p_layout, &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->fmask_copy.ds_layout, + &state->alloc); for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { radv_DestroyPipeline(radv_device_to_handle(device), state->fmask_copy.pipeline[i], &state->alloc); @@ -141,8 +135,8 @@ create_fmask_copy_pipeline(struct radv_device *device, int samples, VkPipeline * .layout = state->fmask_copy.p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, - &vk_pipeline_info, NULL, pipeline); + result = + radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &vk_pipeline_info, NULL, pipeline); ralloc_free(cs); return result; } @@ -173,30 +167,26 @@ radv_device_init_meta_fmask_copy_state_internal(struct radv_device *device, uint .pImmutableSamplers = NULL}, }}; - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, - &device->meta_state.alloc, + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, &device->meta_state.fmask_copy.ds_layout); if (result != VK_SUCCESS) return result; } if (!device->meta_state.fmask_copy.p_layout) { - VkPipelineLayoutCreateInfo pl_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.fmask_copy.ds_layout, - .pushConstantRangeCount = 0, - .pPushConstantRanges = NULL}; - - result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, + VkPipelineLayoutCreateInfo pl_create_info = {.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.fmask_copy.ds_layout, + .pushConstantRangeCount = 0, + .pPushConstantRanges = NULL}; + + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, &device->meta_state.fmask_copy.p_layout); if (result != VK_SUCCESS) return result; } - return create_fmask_copy_pipeline(device, 1u << samples_log2, - &device->meta_state.fmask_copy.pipeline[samples_log2]); + return create_fmask_copy_pipeline(device, 1u << samples_log2, &device->meta_state.fmask_copy.pipeline[samples_log2]); } VkResult @@ -225,23 +215,22 @@ radv_fixup_copy_dst_metadata(struct radv_cmd_buffer *cmd_buffer, const struct ra assert(src_image->planes[0].surface.cmask_size == dst_image->planes[0].surface.cmask_size && src_image->planes[0].surface.fmask_size == dst_image->planes[0].surface.fmask_size); assert(src_image->planes[0].surface.fmask_offset + src_image->planes[0].surface.fmask_size == - src_image->planes[0].surface.cmask_offset && + src_image->planes[0].surface.cmask_offset && dst_image->planes[0].surface.fmask_offset + dst_image->planes[0].surface.fmask_size == - dst_image->planes[0].surface.cmask_offset); + dst_image->planes[0].surface.cmask_offset); /* Copy CMASK+FMASK. */ size = src_image->planes[0].surface.cmask_size + src_image->planes[0].surface.fmask_size; src_offset = src_image->bindings[0].offset + src_image->planes[0].surface.fmask_offset; dst_offset = dst_image->bindings[0].offset + dst_image->planes[0].surface.fmask_offset; - radv_copy_buffer(cmd_buffer, src_image->bindings[0].bo, dst_image->bindings[0].bo, - src_offset, dst_offset, size); + radv_copy_buffer(cmd_buffer, src_image->bindings[0].bo, dst_image->bindings[0].bo, src_offset, dst_offset, size); } bool -radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer, - const struct radv_image *src_image, const struct radv_image *dst_image, - unsigned num_rects, const struct radv_meta_blit2d_rect *rects) +radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image, + const struct radv_image *dst_image, unsigned num_rects, + const struct radv_meta_blit2d_rect *rects) { /* TODO: Test on pre GFX10 chips. */ if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX10) @@ -271,10 +260,9 @@ radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer, return false; /* Source/destination images must have identical swizzle. */ - if (src_image->planes[0].surface.fmask_tile_swizzle != - dst_image->planes[0].surface.fmask_tile_swizzle || + if (src_image->planes[0].surface.fmask_tile_swizzle != dst_image->planes[0].surface.fmask_tile_swizzle || src_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode != - dst_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode) + dst_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode) return false; return true; @@ -332,31 +320,30 @@ radv_fmask_copy(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf }, 0, NULL); - radv_meta_push_descriptor_set( - cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - cmd_buffer->device->meta_state.fmask_copy.p_layout, 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = - (VkDescriptorImageInfo[]){ - {.sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(&src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, - }}, - {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]){ - {.sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(&dst_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, - }}}); + radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + cmd_buffer->device->meta_state.fmask_copy.p_layout, 0, /* set */ + 2, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = + (VkDescriptorImageInfo[]){ + {.sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(&src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, + }}, + {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]){ + {.sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(&dst_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, + }}}); radv_unaligned_dispatch(cmd_buffer, src->image->vk.extent.width, src->image->vk.extent.height, 1); diff --git a/src/amd/vulkan/meta/radv_meta_fmask_expand.c b/src/amd/vulkan/meta/radv_meta_fmask_expand.c index 189b13c..a316a4e 100644 --- a/src/amd/vulkan/meta/radv_meta_fmask_expand.c +++ b/src/amd/vulkan/meta/radv_meta_fmask_expand.c @@ -26,18 +26,15 @@ #include "radv_private.h" #include "vk_format.h" -static VkResult radv_device_init_meta_fmask_expand_state_internal(struct radv_device *device, - uint32_t samples_log2); +static VkResult radv_device_init_meta_fmask_expand_state_internal(struct radv_device *device, uint32_t samples_log2); static nir_shader * build_fmask_expand_compute_shader(struct radv_device *device, int samples) { - const struct glsl_type *type = - glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, GLSL_TYPE_FLOAT); + const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, GLSL_TYPE_FLOAT); const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, true, GLSL_TYPE_FLOAT); - nir_builder b = - radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "meta_fmask_expand_cs-%d", samples); + nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "meta_fmask_expand_cs-%d", samples); b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = 8; @@ -60,13 +57,12 @@ build_fmask_expand_compute_shader(struct radv_device *device, int samples) tex_vals[i] = nir_txf_ms_deref(&b, input_img_deref, tex_coord, nir_imm_int(&b, i)); } - nir_ssa_def *img_coord = - nir_vec4(&b, nir_channel(&b, tex_coord, 0), nir_channel(&b, tex_coord, 1), - nir_channel(&b, tex_coord, 2), nir_ssa_undef(&b, 1, 32)); + nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, tex_coord, 0), nir_channel(&b, tex_coord, 1), + nir_channel(&b, tex_coord, 2), nir_ssa_undef(&b, 1, 32)); for (uint32_t i = 0; i < samples; i++) { - nir_image_deref_store(&b, output_img_deref, img_coord, nir_imm_int(&b, i), tex_vals[i], - nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_MS, .image_array = true); + nir_image_deref_store(&b, output_img_deref, img_coord, nir_imm_int(&b, i), tex_vals[i], nir_imm_int(&b, 0), + .image_dim = GLSL_SAMPLER_DIM_MS, .image_array = true); } return b.shader; @@ -89,16 +85,14 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_ return; } - radv_meta_save(&saved_state, cmd_buffer, - RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS); + radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS); VkPipeline pipeline = device->meta_state.fmask_expand.pipeline[samples_log2]; - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - cmd_buffer->state.flush_bits |= radv_dst_access_flush( - cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, image); + cmd_buffer->state.flush_bits |= + radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, image); radv_image_view_init(&iview, device, &(VkImageViewCreateInfo){ @@ -117,31 +111,30 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_ }, 0, NULL); - radv_meta_push_descriptor_set( - cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - cmd_buffer->device->meta_state.fmask_expand.p_layout, 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = - (VkDescriptorImageInfo[]){ - {.sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(&iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, - }}, - {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]){ - {.sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(&iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, - }}}); + radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + cmd_buffer->device->meta_state.fmask_expand.p_layout, 0, /* set */ + 2, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = + (VkDescriptorImageInfo[]){ + {.sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(&iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, + }}, + {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]){ + {.sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(&iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, + }}}); radv_unaligned_dispatch(cmd_buffer, image->vk.extent.width, image->vk.extent.height, layer_count); @@ -150,8 +143,7 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_ radv_meta_restore(&saved_state, cmd_buffer); cmd_buffer->state.flush_bits |= - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); + RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); /* Re-initialize FMASK in fully expanded mode. */ cmd_buffer->state.flush_bits |= radv_init_fmask(cmd_buffer, image, subresourceRange); @@ -163,14 +155,12 @@ radv_device_finish_meta_fmask_expand_state(struct radv_device *device) struct radv_meta_state *state = &device->meta_state; for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { - radv_DestroyPipeline(radv_device_to_handle(device), state->fmask_expand.pipeline[i], - &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->fmask_expand.pipeline[i], &state->alloc); } - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fmask_expand.p_layout, - &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fmask_expand.p_layout, &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout( - radv_device_to_handle(device), state->fmask_expand.ds_layout, &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->fmask_expand.ds_layout, + &state->alloc); } static VkResult @@ -196,8 +186,8 @@ create_fmask_expand_pipeline(struct radv_device *device, int samples, VkPipeline .layout = state->fmask_expand.p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, - &vk_pipeline_info, NULL, pipeline); + result = + radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &vk_pipeline_info, NULL, pipeline); ralloc_free(cs); return result; @@ -230,8 +220,8 @@ radv_device_init_meta_fmask_expand_state_internal(struct radv_device *device, ui .pImmutableSamplers = NULL}, }}; - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, - &state->alloc, &state->fmask_expand.ds_layout); + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &state->alloc, + &state->fmask_expand.ds_layout); if (result != VK_SUCCESS) return result; } @@ -245,14 +235,13 @@ radv_device_init_meta_fmask_expand_state_internal(struct radv_device *device, ui .pPushConstantRanges = NULL, }; - result = radv_CreatePipelineLayout(radv_device_to_handle(device), &color_create_info, - &state->alloc, &state->fmask_expand.p_layout); + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &color_create_info, &state->alloc, + &state->fmask_expand.p_layout); if (result != VK_SUCCESS) return result; } - result = create_fmask_expand_pipeline(device, 1 << samples_log2, - &state->fmask_expand.pipeline[samples_log2]); + result = create_fmask_expand_pipeline(device, 1 << samples_log2, &state->fmask_expand.pipeline[samples_log2]); return result; } diff --git a/src/amd/vulkan/meta/radv_meta_resolve.c b/src/amd/vulkan/meta/radv_meta_resolve.c index 3893f9a..8d29538 100644 --- a/src/amd/vulkan/meta/radv_meta_resolve.c +++ b/src/amd/vulkan/meta/radv_meta_resolve.c @@ -47,8 +47,7 @@ build_nir_fs(struct radv_device *dev) } static VkResult -create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkFormat format, - VkPipeline *pipeline) +create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkFormat format, VkPipeline *pipeline) { VkResult result; VkDevice device_h = radv_device_to_handle(device); @@ -69,14 +68,13 @@ create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkFormat }; if (!device->meta_state.resolve.p_layout) { - result = - radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, &device->meta_state.resolve.p_layout); + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, + &device->meta_state.resolve.p_layout); if (result != VK_SUCCESS) goto cleanup; } - VkFormat color_formats[2] = { format, format }; + VkFormat color_formats[2] = {format, format}; const VkPipelineRenderingCreateInfo rendering_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, .colorAttachmentCount = 2, @@ -191,11 +189,9 @@ radv_device_finish_meta_resolve_state(struct radv_device *device) struct radv_meta_state *state = &device->meta_state; for (uint32_t j = 0; j < NUM_META_FS_KEYS; j++) { - radv_DestroyPipeline(radv_device_to_handle(device), state->resolve.pipeline[j], - &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve.pipeline[j], &state->alloc); } - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve.p_layout, - &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve.p_layout, &state->alloc); } VkResult @@ -230,8 +226,8 @@ cleanup: } static void -emit_resolve(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image, - const struct radv_image *dst_image, VkFormat vk_format) +emit_resolve(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image, const struct radv_image *dst_image, + VkFormat vk_format) { struct radv_device *device = cmd_buffer->device; VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); @@ -242,12 +238,10 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_im radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT, src_image) | radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, dst_image); - radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.resolve.pipeline[fs_key]); + radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.resolve.pipeline[fs_key]); radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0); - cmd_buffer->state.flush_bits |= - radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, dst_image); + cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, dst_image); } enum radv_resolve_method { @@ -257,28 +251,22 @@ enum radv_resolve_method { }; static bool -image_hw_resolve_compat(const struct radv_device *device, struct radv_image *src_image, - struct radv_image *dst_image) +image_hw_resolve_compat(const struct radv_device *device, struct radv_image *src_image, struct radv_image *dst_image) { if (device->physical_device->rad_info.gfx_level >= GFX9) { - return dst_image->planes[0].surface.u.gfx9.swizzle_mode == - src_image->planes[0].surface.u.gfx9.swizzle_mode; + return dst_image->planes[0].surface.u.gfx9.swizzle_mode == src_image->planes[0].surface.u.gfx9.swizzle_mode; } else { - return dst_image->planes[0].surface.micro_tile_mode == - src_image->planes[0].surface.micro_tile_mode; + return dst_image->planes[0].surface.micro_tile_mode == src_image->planes[0].surface.micro_tile_mode; } } static void -radv_pick_resolve_method_images(struct radv_device *device, struct radv_image *src_image, - VkFormat src_format, struct radv_image *dst_image, - unsigned dst_level, VkImageLayout dst_image_layout, - struct radv_cmd_buffer *cmd_buffer, - enum radv_resolve_method *method) +radv_pick_resolve_method_images(struct radv_device *device, struct radv_image *src_image, VkFormat src_format, + struct radv_image *dst_image, unsigned dst_level, VkImageLayout dst_image_layout, + struct radv_cmd_buffer *cmd_buffer, enum radv_resolve_method *method) { - uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, - cmd_buffer->qf); + uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf); if (vk_format_is_color(src_format)) { /* Using the fragment resolve path is currently a hint to @@ -286,8 +274,7 @@ radv_pick_resolve_method_images(struct radv_device *device, struct radv_image *s * re-initialize it after resolving using compute. * TODO: Add support for layered and int to the fragment path. */ - if (radv_layout_dcc_compressed(device, dst_image, dst_level, dst_image_layout, - queue_mask)) { + if (radv_layout_dcc_compressed(device, dst_image, dst_level, dst_image_layout, queue_mask)) { *method = RESOLVE_FRAGMENT; } else if (!image_hw_resolve_compat(device, src_image, dst_image)) { /* The micro tile mode only needs to match for the HW @@ -364,11 +351,9 @@ radv_meta_resolve_hardware_image(struct radv_cmd_buffer *cmd_buffer, struct radv assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount); - const uint32_t src_base_layer = - radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset); + const uint32_t src_base_layer = radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset); - const uint32_t dst_base_layer = - radv_meta_get_iview_layer(dst_image, ®ion->dstSubresource, ®ion->dstOffset); + const uint32_t dst_base_layer = radv_meta_get_iview_layer(dst_image, ®ion->dstSubresource, ®ion->dstOffset); /** * From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images @@ -386,11 +371,10 @@ radv_meta_resolve_hardware_image(struct radv_cmd_buffer *cmd_buffer, struct radv const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent); const struct VkOffset3D dstOffset = vk_image_sanitize_offset(&dst_image->vk, region->dstOffset); - uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, - cmd_buffer->qf); + uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf); - if (radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, - dst_image_layout, queue_mask)) { + if (radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, dst_image_layout, + queue_mask)) { VkImageSubresourceRange range = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = region->dstSubresource.mipLevel, @@ -403,8 +387,8 @@ radv_meta_resolve_hardware_image(struct radv_cmd_buffer *cmd_buffer, struct radv } VkRect2D resolve_area = { - .offset = { dstOffset.x, dstOffset.y }, - .extent = { extent.width, extent.height }, + .offset = {dstOffset.x, dstOffset.y}, + .extent = {extent.width, extent.height}, }; radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, @@ -500,27 +484,24 @@ radv_meta_resolve_hardware_image(struct radv_cmd_buffer *cmd_buffer, struct radv } static void -resolve_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, - VkImageLayout src_image_layout, struct radv_image *dst_image, - VkImageLayout dst_image_layout, const VkImageResolve2 *region, +resolve_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout, + struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageResolve2 *region, enum radv_resolve_method resolve_method) { switch (resolve_method) { case RESOLVE_HW: - radv_meta_resolve_hardware_image(cmd_buffer, src_image, src_image_layout, dst_image, - dst_image_layout, region); + radv_meta_resolve_hardware_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region); break; case RESOLVE_FRAGMENT: radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region); - radv_meta_resolve_fragment_image(cmd_buffer, src_image, src_image_layout, dst_image, - dst_image_layout, region); + radv_meta_resolve_fragment_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region); break; case RESOLVE_COMPUTE: radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region); - radv_meta_resolve_compute_image(cmd_buffer, src_image, src_image->vk.format, src_image_layout, - dst_image, dst_image->vk.format, dst_image_layout, region); + radv_meta_resolve_compute_image(cmd_buffer, src_image, src_image->vk.format, src_image_layout, dst_image, + dst_image->vk.format, dst_image_layout, region); break; default: assert(!"Invalid resolve method selected"); @@ -528,8 +509,7 @@ resolve_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, } VKAPI_ATTR void VKAPI_CALL -radv_CmdResolveImage2(VkCommandBuffer commandBuffer, - const VkResolveImageInfo2 *pResolveImageInfo) +radv_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 *pResolveImageInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_image, src_image, pResolveImageInfo->srcImage); @@ -537,16 +517,15 @@ radv_CmdResolveImage2(VkCommandBuffer commandBuffer, VkImageLayout src_image_layout = pResolveImageInfo->srcImageLayout; VkImageLayout dst_image_layout = pResolveImageInfo->dstImageLayout; const struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; - enum radv_resolve_method resolve_method = - pdevice->rad_info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW; + enum radv_resolve_method resolve_method = pdevice->rad_info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW; /* we can use the hw resolve only for single full resolves */ if (pResolveImageInfo->regionCount == 1) { - if (pResolveImageInfo->pRegions[0].srcOffset.x || - pResolveImageInfo->pRegions[0].srcOffset.y || pResolveImageInfo->pRegions[0].srcOffset.z) + if (pResolveImageInfo->pRegions[0].srcOffset.x || pResolveImageInfo->pRegions[0].srcOffset.y || + pResolveImageInfo->pRegions[0].srcOffset.z) resolve_method = RESOLVE_COMPUTE; - if (pResolveImageInfo->pRegions[0].dstOffset.x || - pResolveImageInfo->pRegions[0].dstOffset.y || pResolveImageInfo->pRegions[0].dstOffset.z) + if (pResolveImageInfo->pRegions[0].dstOffset.x || pResolveImageInfo->pRegions[0].dstOffset.y || + pResolveImageInfo->pRegions[0].dstOffset.z) resolve_method = RESOLVE_COMPUTE; if (pResolveImageInfo->pRegions[0].extent.width != src_image->vk.extent.width || @@ -560,11 +539,9 @@ radv_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkImageResolve2 *region = &pResolveImageInfo->pRegions[r]; radv_pick_resolve_method_images(cmd_buffer->device, src_image, src_image->vk.format, dst_image, - region->dstSubresource.mipLevel, dst_image_layout, - cmd_buffer, &resolve_method); + region->dstSubresource.mipLevel, dst_image_layout, cmd_buffer, &resolve_method); - resolve_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region, - resolve_method); + resolve_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region, resolve_method); } } @@ -573,8 +550,7 @@ radv_cmd_buffer_resolve_rendering_hw(struct radv_cmd_buffer *cmd_buffer) { struct radv_meta_saved_state saved_state; - radv_meta_save(&saved_state, cmd_buffer, - RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_RENDER); + radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_RENDER); VkRect2D *resolve_area = &saved_state.render.area; @@ -600,11 +576,10 @@ radv_cmd_buffer_resolve_rendering_hw(struct radv_cmd_buffer *cmd_buffer) VkImageLayout dst_layout = saved_state.render.color_att[i].resolve_layout; struct radv_image *dst_img = dst_iview->image; - uint32_t queue_mask = radv_image_queue_family_mask(dst_img, cmd_buffer->qf, - cmd_buffer->qf); + uint32_t queue_mask = radv_image_queue_family_mask(dst_img, cmd_buffer->qf, cmd_buffer->qf); - if (radv_layout_dcc_compressed(cmd_buffer->device, dst_img, dst_iview->vk.base_mip_level, - dst_layout, queue_mask)) { + if (radv_layout_dcc_compressed(cmd_buffer->device, dst_img, dst_iview->vk.base_mip_level, dst_layout, + queue_mask)) { VkImageSubresourceRange range = { .aspectMask = dst_iview->vk.aspects, .baseMipLevel = dst_iview->vk.base_mip_level, @@ -644,8 +619,8 @@ radv_cmd_buffer_resolve_rendering_hw(struct radv_cmd_buffer *cmd_buffer) radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info); - VkResult ret = build_resolve_pipeline( - cmd_buffer->device, radv_format_meta_fs_key(cmd_buffer->device, dst_iview->vk.format)); + VkResult ret = + build_resolve_pipeline(cmd_buffer->device, radv_format_meta_fs_key(cmd_buffer->device, dst_iview->vk.format)); if (ret != VK_SUCCESS) { vk_command_buffer_set_error(&cmd_buffer->vk, ret); continue; @@ -667,8 +642,7 @@ radv_cmd_buffer_resolve_rendering(struct radv_cmd_buffer *cmd_buffer) { const struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; const struct radv_rendering_state *render = &cmd_buffer->state.render; - enum radv_resolve_method resolve_method = - pdevice->rad_info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW; + enum radv_resolve_method resolve_method = pdevice->rad_info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW; bool has_color_resolve = false; for (uint32_t i = 0; i < render->color_att_count; ++i) { @@ -687,19 +661,15 @@ radv_cmd_buffer_resolve_rendering(struct radv_cmd_buffer *cmd_buffer) struct radv_image_view *dst_iview = render->ds_att.resolve_iview; VkImageLayout dst_layout = render->ds_att.resolve_layout; - radv_pick_resolve_method_images(cmd_buffer->device, src_iview->image, src_iview->vk.format, - dst_iview->image, dst_iview->vk.base_mip_level, dst_layout, - cmd_buffer, &resolve_method); + radv_pick_resolve_method_images(cmd_buffer->device, src_iview->image, src_iview->vk.format, dst_iview->image, + dst_iview->vk.base_mip_level, dst_layout, cmd_buffer, &resolve_method); - if ((src_iview->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && - render->ds_att.resolve_mode != VK_RESOLVE_MODE_NONE) { + if ((src_iview->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && render->ds_att.resolve_mode != VK_RESOLVE_MODE_NONE) { if (resolve_method == RESOLVE_FRAGMENT) { - radv_depth_stencil_resolve_rendering_fs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT, - render->ds_att.resolve_mode); + radv_depth_stencil_resolve_rendering_fs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT, render->ds_att.resolve_mode); } else { assert(resolve_method == RESOLVE_COMPUTE); - radv_depth_stencil_resolve_rendering_cs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT, - render->ds_att.resolve_mode); + radv_depth_stencil_resolve_rendering_cs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT, render->ds_att.resolve_mode); } } @@ -765,15 +735,15 @@ radv_cmd_buffer_resolve_rendering(struct radv_cmd_buffer *cmd_buffer) struct radv_image *dst_img = dst_iview->image; radv_pick_resolve_method_images(cmd_buffer->device, src_img, src_iview->vk.format, dst_img, - dst_iview->vk.base_mip_level, dst_layout, - cmd_buffer, &resolve_method); + dst_iview->vk.base_mip_level, dst_layout, cmd_buffer, &resolve_method); VkImageResolve2 region = { .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2, - .extent = { - .width = resolve_area.extent.width, - .height = resolve_area.extent.height, - .depth = 1, - }, + .extent = + { + .width = resolve_area.extent.width, + .height = resolve_area.extent.height, + .depth = 1, + }, .srcSubresource = (VkImageSubresourceLayers){ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, @@ -788,8 +758,8 @@ radv_cmd_buffer_resolve_rendering(struct radv_cmd_buffer *cmd_buffer) .baseArrayLayer = dst_iview->vk.base_array_layer, .layerCount = layer_count, }, - .srcOffset = { resolve_area.offset.x, resolve_area.offset.y, 0 }, - .dstOffset = { resolve_area.offset.x, resolve_area.offset.y, 0 }, + .srcOffset = {resolve_area.offset.x, resolve_area.offset.y, 0}, + .dstOffset = {resolve_area.offset.x, resolve_area.offset.y, 0}, }; switch (resolve_method) { @@ -799,14 +769,12 @@ radv_cmd_buffer_resolve_rendering(struct radv_cmd_buffer *cmd_buffer) case RESOLVE_COMPUTE: radv_decompress_resolve_src(cmd_buffer, src_iview->image, src_layout, ®ion); - radv_cmd_buffer_resolve_rendering_cs(cmd_buffer, src_iview, src_layout, dst_iview, - dst_layout, ®ion); + radv_cmd_buffer_resolve_rendering_cs(cmd_buffer, src_iview, src_layout, dst_iview, dst_layout, ®ion); break; case RESOLVE_FRAGMENT: radv_decompress_resolve_src(cmd_buffer, src_iview->image, src_layout, ®ion); - radv_cmd_buffer_resolve_rendering_fs(cmd_buffer, src_iview, src_layout, dst_iview, - dst_layout); + radv_cmd_buffer_resolve_rendering_fs(cmd_buffer, src_iview, src_layout, dst_iview, dst_layout); break; default: unreachable("Invalid resolve method"); @@ -856,8 +824,7 @@ void radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout, const VkImageResolve2 *region) { - const uint32_t src_base_layer = - radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset); + const uint32_t src_base_layer = radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset); VkImageMemoryBarrier2 barrier = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, @@ -868,13 +835,14 @@ radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer, struct radv_imag .oldLayout = src_image_layout, .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, .image = radv_image_to_handle(src_image), - .subresourceRange = (VkImageSubresourceRange){ - .aspectMask = region->srcSubresource.aspectMask, - .baseMipLevel = region->srcSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = src_base_layer, - .layerCount = region->srcSubresource.layerCount, - }, + .subresourceRange = + (VkImageSubresourceRange){ + .aspectMask = region->srcSubresource.aspectMask, + .baseMipLevel = region->srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = src_base_layer, + .layerCount = region->srcSubresource.layerCount, + }, }; VkSampleLocationsInfoEXT sample_loc_info; @@ -882,8 +850,7 @@ radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer, struct radv_imag /* If the depth/stencil image uses different sample * locations, we need them during HTILE decompressions. */ - struct radv_sample_locations_state *sample_locs = - &cmd_buffer->state.render.sample_locations; + struct radv_sample_locations_state *sample_locs = &cmd_buffer->state.render.sample_locations; sample_loc_info = (VkSampleLocationsInfoEXT){ .sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT, diff --git a/src/amd/vulkan/meta/radv_meta_resolve_cs.c b/src/amd/vulkan/meta/radv_meta_resolve_cs.c index 7b21ef4..c7315ec 100644 --- a/src/amd/vulkan/meta/radv_meta_resolve_cs.c +++ b/src/amd/vulkan/meta/radv_meta_resolve_cs.c @@ -47,8 +47,7 @@ static nir_shader * build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples) { enum glsl_base_type img_base_type = is_integer ? GLSL_TYPE_UINT : GLSL_TYPE_FLOAT; - const struct glsl_type *sampler_type = - glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type); + const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type); const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, img_base_type); nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : (is_srgb ? "srgb" : "float")); @@ -79,14 +78,11 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s if (is_srgb) outval = radv_meta_build_resolve_srgb_conversion(&b, outval); - nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), - nir_channel(&b, dst_coord, 1), - nir_ssa_undef(&b, 1, 32), - nir_ssa_undef(&b, 1, 32)); + nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1), + nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32)); - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, - nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), - .image_dim = GLSL_SAMPLER_DIM_2D); + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32), + outval, nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D); return b.shader; } @@ -117,13 +113,12 @@ build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, VkResolveModeFlagBits resolve_mode) { enum glsl_base_type img_base_type = index == DEPTH_RESOLVE ? GLSL_TYPE_FLOAT : GLSL_TYPE_UINT; - const struct glsl_type *sampler_type = - glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, img_base_type); + const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, img_base_type); const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, img_base_type); - nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs_%s-%s-%d", - index == DEPTH_RESOLVE ? "depth" : "stencil", - get_resolve_mode_str(resolve_mode), samples); + nir_builder b = + radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs_%s-%s-%d", + index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples); b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = 8; @@ -139,13 +134,10 @@ build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8); - nir_ssa_def *resolve_coord = nir_iadd(&b, - nir_trim_vector(&b, global_id, 2), - offset); + nir_ssa_def *resolve_coord = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), offset); - nir_ssa_def *img_coord = nir_vec3(&b, nir_channel(&b, resolve_coord, 0), - nir_channel(&b, resolve_coord, 1), - nir_channel(&b, global_id, 2)); + nir_ssa_def *img_coord = + nir_vec3(&b, nir_channel(&b, resolve_coord, 0), nir_channel(&b, resolve_coord, 1), nir_channel(&b, global_id, 2)); nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img); nir_ssa_def *outval = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, 0)); @@ -182,9 +174,8 @@ build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1), nir_channel(&b, img_coord, 2), nir_ssa_undef(&b, 1, 32)); - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, - nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), - .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true); + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32), outval, + nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true); return b.shader; } @@ -196,25 +187,23 @@ create_layout(struct radv_device *device) * two descriptors one for the image being sampled * one for the buffer being written. */ - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 2, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - {.binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, - &device->meta_state.alloc, + VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + {.binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, &device->meta_state.resolve_compute.ds_layout); if (result != VK_SUCCESS) goto fail; @@ -227,8 +216,7 @@ create_layout(struct radv_device *device) .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, }; - result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, &device->meta_state.resolve_compute.p_layout); if (result != VK_SUCCESS) goto fail; @@ -238,8 +226,7 @@ fail: } static VkResult -create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb, - VkPipeline *pipeline) +create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb, VkPipeline *pipeline) { VkResult result; @@ -268,8 +255,8 @@ create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer .layout = device->meta_state.resolve_compute.p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info, NULL, pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + NULL, pipeline); if (result != VK_SUCCESS) goto fail; @@ -294,8 +281,7 @@ create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, i return VK_SUCCESS; } - nir_shader *cs = - build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode); + nir_shader *cs = build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode); /* compute shader */ VkPipelineShaderStageCreateInfo pipeline_shader_stage = { @@ -313,8 +299,8 @@ create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, i .layout = device->meta_state.resolve_compute.p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info, NULL, pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + NULL, pipeline); if (result != VK_SUCCESS) goto fail; @@ -343,60 +329,50 @@ radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_ for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { uint32_t samples = 1 << i; - res = create_resolve_pipeline(device, samples, false, false, - &state->resolve_compute.rc[i].pipeline); + res = create_resolve_pipeline(device, samples, false, false, &state->resolve_compute.rc[i].pipeline); if (res != VK_SUCCESS) return res; - res = create_resolve_pipeline(device, samples, true, false, - &state->resolve_compute.rc[i].i_pipeline); + res = create_resolve_pipeline(device, samples, true, false, &state->resolve_compute.rc[i].i_pipeline); if (res != VK_SUCCESS) return res; - res = create_resolve_pipeline(device, samples, false, true, - &state->resolve_compute.rc[i].srgb_pipeline); + res = create_resolve_pipeline(device, samples, false, true, &state->resolve_compute.rc[i].srgb_pipeline); if (res != VK_SUCCESS) return res; - res = create_depth_stencil_resolve_pipeline( - device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT, - &state->resolve_compute.depth[i].average_pipeline); + res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT, + &state->resolve_compute.depth[i].average_pipeline); if (res != VK_SUCCESS) return res; - res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, - VK_RESOLVE_MODE_MAX_BIT, + res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_MAX_BIT, &state->resolve_compute.depth[i].max_pipeline); if (res != VK_SUCCESS) return res; - res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, - VK_RESOLVE_MODE_MIN_BIT, + res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_MIN_BIT, &state->resolve_compute.depth[i].min_pipeline); if (res != VK_SUCCESS) return res; - res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, - VK_RESOLVE_MODE_MAX_BIT, + res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, VK_RESOLVE_MODE_MAX_BIT, &state->resolve_compute.stencil[i].max_pipeline); if (res != VK_SUCCESS) return res; - res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, - VK_RESOLVE_MODE_MIN_BIT, + res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, VK_RESOLVE_MODE_MIN_BIT, &state->resolve_compute.stencil[i].min_pipeline); if (res != VK_SUCCESS) return res; } - res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE, - VK_RESOLVE_MODE_SAMPLE_ZERO_BIT, + res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT, &state->resolve_compute.depth_zero_pipeline); if (res != VK_SUCCESS) return res; - return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE, - VK_RESOLVE_MODE_SAMPLE_ZERO_BIT, + return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT, &state->resolve_compute.stencil_zero_pipeline); } @@ -405,41 +381,33 @@ radv_device_finish_meta_resolve_compute_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { - radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline, - &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline, - &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->resolve_compute.rc[i].srgb_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].srgb_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->resolve_compute.depth[i].average_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].average_pipeline, + &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->resolve_compute.depth[i].max_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].max_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->resolve_compute.depth[i].min_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].min_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->resolve_compute.stencil[i].max_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil[i].max_pipeline, + &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->resolve_compute.stencil[i].min_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil[i].min_pipeline, + &state->alloc); } - radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline, - &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline, - &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline, &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout( - radv_device_to_handle(device), state->resolve_compute.ds_layout, &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout, - &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->resolve_compute.ds_layout, + &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout, &state->alloc); } static VkPipeline * @@ -473,45 +441,42 @@ radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_ } static void -emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, - struct radv_image_view *dst_iview, const VkOffset2D *src_offset, - const VkOffset2D *dst_offset, const VkExtent2D *resolve_extent) +emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, struct radv_image_view *dst_iview, + const VkOffset2D *src_offset, const VkOffset2D *dst_offset, const VkExtent2D *resolve_extent) { struct radv_device *device = cmd_buffer->device; VkPipeline *pipeline; - radv_meta_push_descriptor_set( - cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout, - 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = - (VkDescriptorImageInfo[]){ - {.sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, - }}, - {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]){ - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(dst_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - }}}); + radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + device->meta_state.resolve_compute.p_layout, 0, /* set */ + 2, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = + (VkDescriptorImageInfo[]){ + {.sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, + }}, + {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]){ + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(dst_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }}}); pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview); - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, - *pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); unsigned push_constants[4] = { src_offset->x, @@ -519,9 +484,8 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_ivi dst_offset->x, dst_offset->y, }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, 16, push_constants); + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_compute.p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants); radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1); } @@ -536,33 +500,32 @@ emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image const uint32_t samples_log2 = ffs(samples) - 1; VkPipeline *pipeline; - radv_meta_push_descriptor_set( - cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout, - 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = - (VkDescriptorImageInfo[]){ - {.sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, - }}, - {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]){ - { - .sampler = VK_NULL_HANDLE, - .imageView = radv_image_view_to_handle(dst_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - }}}); + radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + device->meta_state.resolve_compute.p_layout, 0, /* set */ + 2, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = + (VkDescriptorImageInfo[]){ + {.sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, + }}, + {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]){ + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(dst_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }}}); switch (resolve_mode) { case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: @@ -602,23 +565,19 @@ emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image } } - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, - *pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); - uint32_t push_constants[2] = { resolve_offset->x, resolve_offset->y }; + uint32_t push_constants[2] = {resolve_offset->x, resolve_offset->y}; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(push_constants), push_constants); + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_compute.p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), push_constants); - radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, - resolve_extent->depth); + radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, resolve_extent->depth); } void -radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, - VkFormat src_format, VkImageLayout src_image_layout, - struct radv_image *dst_image, VkFormat dst_format, +radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkFormat src_format, + VkImageLayout src_image_layout, struct radv_image *dst_image, VkFormat dst_format, VkImageLayout dst_image_layout, const VkImageResolve2 *region) { struct radv_meta_saved_state saved_state; @@ -626,15 +585,13 @@ radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_ /* For partial resolves, DCC should be decompressed before resolving * because the metadata is re-initialized to the uncompressed after. */ - uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, - cmd_buffer->qf); + uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf); if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dst_image) && - radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, - dst_image_layout, queue_mask) && + radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, dst_image_layout, + queue_mask) && (region->dstOffset.x || region->dstOffset.y || region->dstOffset.z || - region->extent.width != dst_image->vk.extent.width || - region->extent.height != dst_image->vk.extent.height || + region->extent.width != dst_image->vk.extent.width || region->extent.height != dst_image->vk.extent.height || region->extent.depth != dst_image->vk.extent.depth)) { radv_decompress_dcc(cmd_buffer, dst_image, &(VkImageSubresourceRange){ @@ -646,19 +603,16 @@ radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_ }); } - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount); - const uint32_t src_base_layer = - radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset); + const uint32_t src_base_layer = radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset); - const uint32_t dst_base_layer = - radv_meta_get_iview_layer(dst_image, ®ion->dstSubresource, ®ion->dstOffset); + const uint32_t dst_base_layer = radv_meta_get_iview_layer(dst_image, ®ion->dstSubresource, ®ion->dstOffset); const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent); const struct VkOffset3D srcOffset = vk_image_sanitize_offset(&src_image->vk, region->srcOffset); @@ -703,8 +657,7 @@ radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_ 0, NULL); emit_resolve(cmd_buffer, &src_iview, &dst_iview, &(VkOffset2D){srcOffset.x, srcOffset.y}, - &(VkOffset2D){dstOffset.x, dstOffset.y}, - &(VkExtent2D){extent.width, extent.height}); + &(VkOffset2D){dstOffset.x, dstOffset.y}, &(VkExtent2D){extent.width, extent.height}); radv_image_view_finish(&src_iview); radv_image_view_finish(&dst_iview); @@ -713,8 +666,8 @@ radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_ radv_meta_restore(&saved_state, cmd_buffer); if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dst_image) && - radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, - dst_image_layout, queue_mask)) { + radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, dst_image_layout, + queue_mask)) { cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE; @@ -731,24 +684,19 @@ radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_ } void -radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, - struct radv_image_view *src_iview, VkImageLayout src_layout, - struct radv_image_view *dst_iview, VkImageLayout dst_layout, - const VkImageResolve2 *region) +radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, + VkImageLayout src_layout, struct radv_image_view *dst_iview, + VkImageLayout dst_layout, const VkImageResolve2 *region) { - radv_meta_resolve_compute_image(cmd_buffer, - src_iview->image, src_iview->vk.format, src_layout, - dst_iview->image, dst_iview->vk.format, dst_layout, - region); + radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk.format, src_layout, dst_iview->image, + dst_iview->vk.format, dst_layout, region); - cmd_buffer->state.flush_bits |= - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | - radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL); + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | + radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL); } void -radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, - VkImageAspectFlags aspects, +radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode) { const struct radv_rendering_state *render = &cmd_buffer->state.render; @@ -780,8 +728,7 @@ radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, radv_decompress_resolve_src(cmd_buffer, src_image, src_layout, ®ion); - radv_meta_save(&saved_state, cmd_buffer, - RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS); + radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS); struct radv_image_view *dst_iview = render->ds_att.resolve_iview; VkImageLayout dst_layout = render->ds_att.resolve_layout; @@ -827,12 +774,10 @@ radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, &(VkExtent3D){resolve_area.extent.width, resolve_area.extent.height, layer_count}, aspects, resolve_mode); - cmd_buffer->state.flush_bits |= - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | - radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL); + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | + radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL); - uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, - cmd_buffer->qf); + uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf); if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, dst_layout, queue_mask)) { VkImageSubresourceRange range = {0}; diff --git a/src/amd/vulkan/meta/radv_meta_resolve_fs.c b/src/amd/vulkan/meta/radv_meta_resolve_fs.c index 58392e1..6ffc284 100644 --- a/src/amd/vulkan/meta/radv_meta_resolve_fs.c +++ b/src/amd/vulkan/meta/radv_meta_resolve_fs.c @@ -35,11 +35,10 @@ build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, int samp { enum glsl_base_type img_base_type = is_integer ? GLSL_TYPE_UINT : GLSL_TYPE_FLOAT; const struct glsl_type *vec4 = glsl_vec4_type(); - const struct glsl_type *sampler_type = - glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type); + const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type); - nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs-%d-%s", - samples, is_integer ? "int" : "float"); + nir_builder b = + radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : "float"); nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); input_img->data.descriptor_set = 0; @@ -53,9 +52,7 @@ build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, int samp nir_ssa_def *pos_int = nir_f2i32(&b, pos_in); - nir_ssa_def *img_coord = nir_trim_vector(&b, - nir_iadd(&b, pos_int, src_offset), - 2); + nir_ssa_def *img_coord = nir_trim_vector(&b, nir_iadd(&b, pos_int, src_offset), 2); nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color"); radv_meta_build_resolve_shader_core(dev, &b, is_integer, samples, input_img, color, img_coord); @@ -72,20 +69,18 @@ create_layout(struct radv_device *device) /* * one descriptors for the image being sampled */ - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, - &device->meta_state.alloc, + VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, &device->meta_state.resolve_fragment.ds_layout); if (result != VK_SUCCESS) goto fail; @@ -98,8 +93,7 @@ create_layout(struct radv_device *device) .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8}, }; - result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, &device->meta_state.resolve_fragment.p_layout); if (result != VK_SUCCESS) goto fail; @@ -175,16 +169,15 @@ create_resolve_pipeline(struct radv_device *device, int samples_log2, VkFormat f .scissorCount = 1, }, .pRasterizationState = - &(VkPipelineRasterizationStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .rasterizerDiscardEnable = false, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f}, + &(VkPipelineRasterizationStateCreateInfo){.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f}, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo){ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, @@ -198,10 +191,10 @@ create_resolve_pipeline(struct radv_device *device, int samples_log2, VkFormat f .attachmentCount = 1, .pAttachments = (VkPipelineColorBlendAttachmentState[]){ - {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT}, + {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT}, }, - .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }}, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}}, .pDynamicState = &(VkPipelineDynamicStateCreateInfo){ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, @@ -220,9 +213,8 @@ create_resolve_pipeline(struct radv_device *device, int samples_log2, VkFormat f const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true}; - result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info, &radv_pipeline_info, - &device->meta_state.alloc, pipeline); + result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + &radv_pipeline_info, &device->meta_state.alloc, pipeline); ralloc_free(vs); ralloc_free(fs); @@ -255,12 +247,11 @@ build_depth_stencil_resolve_fragment_shader(struct radv_device *dev, int samples { enum glsl_base_type img_base_type = index == DEPTH_RESOLVE ? GLSL_TYPE_FLOAT : GLSL_TYPE_UINT; const struct glsl_type *vec4 = glsl_vec4_type(); - const struct glsl_type *sampler_type = - glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type); + const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type); - nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs_%s-%s-%d", - index == DEPTH_RESOLVE ? "depth" : "stencil", - get_resolve_mode_str(resolve_mode), samples); + nir_builder b = + radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs_%s-%s-%d", + index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples); nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); input_img->data.descriptor_set = 0; @@ -355,8 +346,7 @@ create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples_lo } uint32_t samples = 1 << samples_log2; - nir_shader *fs = - build_depth_stencil_resolve_fragment_shader(device, samples, index, resolve_mode); + nir_shader *fs = build_depth_stencil_resolve_fragment_shader(device, samples, index, resolve_mode); nir_shader *vs = radv_meta_build_nir_vs_generate_vertices(device); VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { @@ -390,15 +380,16 @@ create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples_lo .writeMask = UINT32_MAX, .reference = 0u, }, - .back = { - .failOp = stencil_op, - .passOp = stencil_op, - .depthFailOp = stencil_op, - .compareOp = VK_COMPARE_OP_ALWAYS, - .compareMask = UINT32_MAX, - .writeMask = UINT32_MAX, - .reference = 0u, - }, + .back = + { + .failOp = stencil_op, + .passOp = stencil_op, + .depthFailOp = stencil_op, + .compareOp = VK_COMPARE_OP_ALWAYS, + .compareMask = UINT32_MAX, + .writeMask = UINT32_MAX, + .reference = 0u, + }, .minDepthBounds = 0.0f, .maxDepthBounds = 1.0f}; @@ -431,16 +422,15 @@ create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples_lo }, .pDepthStencilState = &depth_stencil_state, .pRasterizationState = - &(VkPipelineRasterizationStateCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .rasterizerDiscardEnable = false, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f}, + &(VkPipelineRasterizationStateCreateInfo){.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f}, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo){ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, @@ -454,10 +444,10 @@ create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples_lo .attachmentCount = 0, .pAttachments = (VkPipelineColorBlendAttachmentState[]){ - {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT}, + {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT}, }, - .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }}, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}}, .pDynamicState = &(VkPipelineDynamicStateCreateInfo){ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, @@ -476,9 +466,8 @@ create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples_lo const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true}; - result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &vk_pipeline_info, &radv_pipeline_info, - &device->meta_state.alloc, pipeline); + result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, + &radv_pipeline_info, &device->meta_state.alloc, pipeline); ralloc_free(vs); ralloc_free(fs); @@ -506,39 +495,32 @@ radv_device_init_meta_resolve_fragment_state(struct radv_device *device, bool on return res; } - res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE, - VK_RESOLVE_MODE_AVERAGE_BIT); + res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT); if (res != VK_SUCCESS) return res; - res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE, - VK_RESOLVE_MODE_MIN_BIT); + res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE, VK_RESOLVE_MODE_MIN_BIT); if (res != VK_SUCCESS) return res; - res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE, - VK_RESOLVE_MODE_MAX_BIT); + res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE, VK_RESOLVE_MODE_MAX_BIT); if (res != VK_SUCCESS) return res; - res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE, - VK_RESOLVE_MODE_MIN_BIT); + res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE, VK_RESOLVE_MODE_MIN_BIT); if (res != VK_SUCCESS) return res; - res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE, - VK_RESOLVE_MODE_MAX_BIT); + res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE, VK_RESOLVE_MODE_MAX_BIT); if (res != VK_SUCCESS) return res; } - res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE, - VK_RESOLVE_MODE_SAMPLE_ZERO_BIT); + res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT); if (res != VK_SUCCESS) return res; - return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE, - VK_RESOLVE_MODE_SAMPLE_ZERO_BIT); + return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT); } void @@ -547,35 +529,29 @@ radv_device_finish_meta_resolve_fragment_state(struct radv_device *device) struct radv_meta_state *state = &device->meta_state; for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) { - radv_DestroyPipeline(radv_device_to_handle(device), - state->resolve_fragment.rc[i].pipeline[j], &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.rc[i].pipeline[j], &state->alloc); } - radv_DestroyPipeline(radv_device_to_handle(device), - state->resolve_fragment.depth[i].average_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.depth[i].average_pipeline, + &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->resolve_fragment.depth[i].max_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.depth[i].max_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->resolve_fragment.depth[i].min_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.depth[i].min_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->resolve_fragment.stencil[i].max_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.stencil[i].max_pipeline, + &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->resolve_fragment.stencil[i].min_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.stencil[i].min_pipeline, + &state->alloc); } - radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.depth_zero_pipeline, - &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->resolve_fragment.stencil_zero_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.depth_zero_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.stencil_zero_pipeline, &state->alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout( - radv_device_to_handle(device), state->resolve_fragment.ds_layout, &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_fragment.p_layout, - &state->alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), + state->resolve_fragment.ds_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_fragment.p_layout, &state->alloc); } static VkPipeline * @@ -603,18 +579,16 @@ radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_ } static void -emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, - struct radv_image_view *dst_iview, const VkOffset2D *src_offset, - const VkOffset2D *dst_offset) +emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, struct radv_image_view *dst_iview, + const VkOffset2D *src_offset, const VkOffset2D *dst_offset) { struct radv_device *device = cmd_buffer->device; VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); VkPipeline *pipeline; radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - cmd_buffer->device->meta_state.resolve_fragment.p_layout, - 0, /* set */ - 1, /* descriptorWriteCount */ + cmd_buffer->device->meta_state.resolve_fragment.p_layout, 0, /* set */ + 1, /* descriptorWriteCount */ (VkWriteDescriptorSet[]){ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstBinding = 0, @@ -639,9 +613,8 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_ivi src_offset->x - dst_offset->x, src_offset->y - dst_offset->y, }; - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - device->meta_state.resolve_fragment.p_layout, VK_SHADER_STAGE_FRAGMENT_BIT, - 0, 8, push_constants); + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_fragment.p_layout, + VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8, push_constants); pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview, dst_iview); @@ -664,9 +637,8 @@ emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image VkPipeline *pipeline; radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - cmd_buffer->device->meta_state.resolve_fragment.p_layout, - 0, /* set */ - 1, /* descriptorWriteCount */ + cmd_buffer->device->meta_state.resolve_fragment.p_layout, 0, /* set */ + 1, /* descriptorWriteCount */ (VkWriteDescriptorSet[]){ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstBinding = 0, @@ -721,8 +693,7 @@ emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image } } - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, - *pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport){.x = resolve_offset->x, @@ -750,27 +721,24 @@ radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer, struct radv unsigned dst_layout = radv_meta_dst_layout_from_layout(dst_image_layout); VkImageLayout layout = radv_meta_dst_layout_to_layout(dst_layout); - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount); - const uint32_t src_base_layer = - radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset); + const uint32_t src_base_layer = radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset); - const uint32_t dst_base_layer = - radv_meta_get_iview_layer(dst_image, ®ion->dstSubresource, ®ion->dstOffset); + const uint32_t dst_base_layer = radv_meta_get_iview_layer(dst_image, ®ion->dstSubresource, ®ion->dstOffset); const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent); const struct VkOffset3D srcOffset = vk_image_sanitize_offset(&src_image->vk, region->srcOffset); const struct VkOffset3D dstOffset = vk_image_sanitize_offset(&dst_image->vk, region->dstOffset); VkRect2D resolve_area = { - .offset = { dstOffset.x, dstOffset.y }, - .extent = { extent.width, extent.height }, + .offset = {dstOffset.x, dstOffset.y}, + .extent = {extent.width, extent.height}, }; radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, @@ -852,9 +820,9 @@ radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer, struct radv } void -radv_cmd_buffer_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, - struct radv_image_view *src_iview, VkImageLayout src_layout, - struct radv_image_view *dst_iview, VkImageLayout dst_layout) +radv_cmd_buffer_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, + VkImageLayout src_layout, struct radv_image_view *dst_iview, + VkImageLayout dst_layout) { const struct radv_rendering_state *render = &cmd_buffer->state.render; struct radv_meta_saved_state saved_state; @@ -862,8 +830,7 @@ radv_cmd_buffer_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, radv_meta_save( &saved_state, cmd_buffer, - RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS | - RADV_META_SAVE_RENDER); + RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_RENDER); radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport){.x = resolve_area.offset.x, @@ -905,8 +872,7 @@ radv_cmd_buffer_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, * Depth/stencil resolves for the current rendering. */ void -radv_depth_stencil_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, - VkImageAspectFlags aspects, +radv_depth_stencil_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode) { const struct radv_rendering_state *render = &cmd_buffer->state.render; @@ -935,8 +901,7 @@ radv_depth_stencil_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, radv_decompress_resolve_src(cmd_buffer, src_image, src_layout, ®ion); radv_meta_save(&saved_state, cmd_buffer, - RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_DESCRIPTORS | - RADV_META_SAVE_RENDER); + RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_RENDER); struct radv_image_view *dst_iview = saved_state.render.ds_att.resolve_iview; @@ -961,10 +926,8 @@ radv_depth_stencil_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, .renderArea = saved_state.render.area, .layerCount = saved_state.render.layer_count, .viewMask = saved_state.render.view_mask, - .pDepthAttachment = (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? - &depth_att : NULL, - .pStencilAttachment = (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? - &stencil_att : NULL, + .pDepthAttachment = (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? &depth_att : NULL, + .pStencilAttachment = (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? &stencil_att : NULL, }; radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info); @@ -987,8 +950,8 @@ radv_depth_stencil_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, }, 0, NULL); - emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, dst_iview, &resolve_area.offset, - &resolve_area.extent, aspects, resolve_mode); + emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, dst_iview, &resolve_area.offset, &resolve_area.extent, aspects, + resolve_mode); radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer)); diff --git a/src/amd/vulkan/nir/radv_nir.h b/src/amd/vulkan/nir/radv_nir.h index 651d510..1599a62 100644 --- a/src/amd/vulkan/nir/radv_nir.h +++ b/src/amd/vulkan/nir/radv_nir.h @@ -43,27 +43,24 @@ struct radv_shader_args; struct radv_device; void radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device, - const struct radv_pipeline_layout *layout, - const struct radv_shader_info *info, + const struct radv_pipeline_layout *layout, const struct radv_shader_info *info, const struct radv_shader_args *args); -void radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, - const struct radv_shader_info *info, const struct radv_shader_args *args, - const struct radv_pipeline_key *pl_key, uint32_t address32_hi); +void radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, const struct radv_shader_info *info, + const struct radv_shader_args *args, const struct radv_pipeline_key *pl_key, + uint32_t address32_hi); bool radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device); bool radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_pipeline_stage *vs_stage, - const struct radv_pipeline_key *pl_key, - const struct radeon_info *rad_info); + const struct radv_pipeline_key *pl_key, const struct radeon_info *rad_info); bool radv_nir_lower_primitive_shading_rate(nir_shader *nir, enum amd_gfx_level gfx_level); bool radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_pipeline_stage *fs_stage, const struct radv_pipeline_key *key); -bool radv_nir_lower_fs_barycentric(nir_shader *shader, const struct radv_pipeline_key *key, - unsigned rast_prim); +bool radv_nir_lower_fs_barycentric(nir_shader *shader, const struct radv_pipeline_key *key, unsigned rast_prim); bool radv_nir_lower_intrinsics_early(nir_shader *nir, const struct radv_pipeline_key *key); diff --git a/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c b/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c index 1c4c4b7..76fba73 100644 --- a/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c +++ b/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c @@ -79,8 +79,8 @@ visit_vulkan_resource_index(nir_builder *b, apply_layout_state *state, nir_intri nir_ssa_def *set_ptr; if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { - unsigned idx = state->pipeline_layout->set[desc_set].dynamic_offset_start + - layout->binding[binding].dynamic_offset_offset; + unsigned idx = + state->pipeline_layout->set[desc_set].dynamic_offset_start + layout->binding[binding].dynamic_offset_offset; set_ptr = get_scalar_arg(b, 1, state->args->ac.push_constants); offset = state->pipeline_layout->push_constant_size + idx * 16; stride = 16; @@ -99,15 +99,13 @@ visit_vulkan_resource_index(nir_builder *b, apply_layout_state *state, nir_intri assert(stride == 16); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr)); } else { - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_vec3(b, set_ptr, binding_ptr, nir_imm_int(b, stride))); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vec3(b, set_ptr, binding_ptr, nir_imm_int(b, stride))); } nir_instr_remove(&intrin->instr); } static void -visit_vulkan_resource_reindex(nir_builder *b, apply_layout_state *state, - nir_intrinsic_instr *intrin) +visit_vulkan_resource_reindex(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin) { VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); if (desc_type == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) { @@ -121,8 +119,7 @@ visit_vulkan_resource_reindex(nir_builder *b, apply_layout_state *state, nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr)); } else { - assert(desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || - desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + assert(desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); nir_ssa_def *binding_ptr = nir_channel(b, intrin->src[0].ssa, 1); nir_ssa_def *stride = nir_channel(b, intrin->src[0].ssa, 2); @@ -132,8 +129,7 @@ visit_vulkan_resource_reindex(nir_builder *b, apply_layout_state *state, binding_ptr = nir_iadd_nuw(b, binding_ptr, index); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_vector_insert_imm(b, intrin->src[0].ssa, binding_ptr, 1)); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vector_insert_imm(b, intrin->src[0].ssa, binding_ptr, 1)); } nir_instr_remove(&intrin->instr); } @@ -142,16 +138,14 @@ static void visit_load_vulkan_descriptor(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin) { if (nir_intrinsic_desc_type(intrin) == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) { - nir_ssa_def *addr = - convert_pointer_to_64_bit(b, state, - nir_iadd(b, nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa), - nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa))); + nir_ssa_def *addr = convert_pointer_to_64_bit(b, state, + nir_iadd(b, nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa), + nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa))); nir_ssa_def *desc = nir_build_load_global(b, 1, 64, addr, .access = ACCESS_NON_WRITEABLE); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc); } else { - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_vector_insert_imm(b, intrin->src[0].ssa, nir_imm_int(b, 0), 2)); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vector_insert_imm(b, intrin->src[0].ssa, nir_imm_int(b, 0), 2)); } nir_instr_remove(&intrin->instr); } @@ -159,27 +153,24 @@ visit_load_vulkan_descriptor(nir_builder *b, apply_layout_state *state, nir_intr static nir_ssa_def * load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc) { - uint32_t desc_type = - S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); + uint32_t desc_type = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (state->gfx_level >= GFX11) { - desc_type |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); + desc_type |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); } else if (state->gfx_level >= GFX10) { - desc_type |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); + desc_type |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | + S_008F0C_RESOURCE_LEVEL(1); } else { - desc_type |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + desc_type |= + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); } - return nir_vec4(b, rsrc, nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(state->address32_hi)), - nir_imm_int(b, 0xffffffff), nir_imm_int(b, desc_type)); + return nir_vec4(b, rsrc, nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(state->address32_hi)), nir_imm_int(b, 0xffffffff), + nir_imm_int(b, desc_type)); } static nir_ssa_def * -load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc, - unsigned access) +load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc, unsigned access) { nir_binding binding = nir_chase_binding(nir_src_for_ssa(rsrc)); @@ -187,8 +178,7 @@ load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *r * VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK. */ if (binding.success) { - struct radv_descriptor_set_layout *layout = - state->pipeline_layout->set[binding.desc_set].layout; + struct radv_descriptor_set_layout *layout = state->pipeline_layout->set[binding.desc_set].layout; if (layout->binding[binding.binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { rsrc = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1)); return load_inline_buffer_descriptor(b, state, rsrc); @@ -212,9 +202,8 @@ visit_get_ssbo_size(nir_builder *b, apply_layout_state *state, nir_intrinsic_ins nir_ssa_def *ptr = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1)); ptr = nir_iadd_imm(b, ptr, 8); ptr = convert_pointer_to_64_bit(b, state, ptr); - size = - nir_build_load_global(b, 4, 32, ptr, .access = ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER, - .align_mul = 16, .align_offset = 4); + size = nir_build_load_global(b, 4, 32, ptr, .access = ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER, .align_mul = 16, + .align_offset = 4); } else { /* load the entire descriptor so it can be CSE'd */ nir_ssa_def *ptr = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0)); @@ -227,9 +216,8 @@ visit_get_ssbo_size(nir_builder *b, apply_layout_state *state, nir_intrinsic_ins } static nir_ssa_def * -get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *deref, - enum ac_descriptor_type desc_type, bool non_uniform, nir_tex_instr *tex, - bool write) +get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *deref, enum ac_descriptor_type desc_type, + bool non_uniform, nir_tex_instr *tex, bool write) { nir_variable *var = nir_deref_instr_get_variable(deref); assert(var); @@ -259,9 +247,8 @@ get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *der uint32_t dword0_mask = tex->op == nir_texop_tg4 ? C_008F30_TRUNC_COORD : 0xffffffffu; const uint32_t *samplers = radv_immutable_samplers(layout, binding); - return nir_imm_ivec4(b, samplers[constant_index * 4 + 0] & dword0_mask, - samplers[constant_index * 4 + 1], samplers[constant_index * 4 + 2], - samplers[constant_index * 4 + 3]); + return nir_imm_ivec4(b, samplers[constant_index * 4 + 0] & dword0_mask, samplers[constant_index * 4 + 1], + samplers[constant_index * 4 + 2], samplers[constant_index * 4 + 3]); } unsigned size = 8; @@ -322,8 +309,7 @@ get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *der * use the tail from plane 1 so that we can store only the first 16 bytes * of the last plane. */ if (desc_type == AC_DESC_PLANE_2) { - nir_ssa_def *desc2 = - get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex, write); + nir_ssa_def *desc2 = get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex, write); nir_ssa_def *comp[8]; for (unsigned i = 0; i < 4; i++) @@ -364,12 +350,11 @@ update_image_intrinsic(nir_builder *b, apply_layout_state *state, nir_intrinsic_ { nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); const enum glsl_sampler_dim dim = glsl_get_sampler_dim(deref->type); - bool is_load = intrin->intrinsic == nir_intrinsic_image_deref_load || - intrin->intrinsic == nir_intrinsic_image_deref_sparse_load; + bool is_load = + intrin->intrinsic == nir_intrinsic_image_deref_load || intrin->intrinsic == nir_intrinsic_image_deref_sparse_load; - nir_ssa_def *desc = get_sampler_desc( - b, state, deref, dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE, - nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM, NULL, !is_load); + nir_ssa_def *desc = get_sampler_desc(b, state, deref, dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE, + nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM, NULL, !is_load); if (intrin->intrinsic == nir_intrinsic_image_deref_descriptor_amd) { nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc); @@ -454,25 +439,20 @@ apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *te if (plane >= 0) { assert(tex->op != nir_texop_txf_ms && tex->op != nir_texop_samples_identical); assert(tex->sampler_dim != GLSL_SAMPLER_DIM_BUF); - image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_PLANE_0 + plane, - tex->texture_non_uniform, tex, false); + image = + get_sampler_desc(b, state, texture_deref_instr, AC_DESC_PLANE_0 + plane, tex->texture_non_uniform, tex, false); } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) { - image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_BUFFER, - tex->texture_non_uniform, tex, false); + image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_BUFFER, tex->texture_non_uniform, tex, false); } else if (tex->op == nir_texop_fragment_mask_fetch_amd) { - image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_FMASK, - tex->texture_non_uniform, tex, false); + image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_FMASK, tex->texture_non_uniform, tex, false); } else { - image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_IMAGE, - tex->texture_non_uniform, tex, false); + image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_IMAGE, tex->texture_non_uniform, tex, false); } if (sampler_deref_instr) { - sampler = get_sampler_desc(b, state, sampler_deref_instr, AC_DESC_SAMPLER, - tex->sampler_non_uniform, tex, false); + sampler = get_sampler_desc(b, state, sampler_deref_instr, AC_DESC_SAMPLER, tex->sampler_non_uniform, tex, false); - if (state->disable_aniso_single_level && tex->sampler_dim < GLSL_SAMPLER_DIM_RECT && - state->gfx_level < GFX8) { + if (state->disable_aniso_single_level && tex->sampler_dim < GLSL_SAMPLER_DIM_RECT && state->gfx_level < GFX8) { /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. * * GFX6-GFX7: @@ -519,8 +499,7 @@ apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *te void radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device, - const struct radv_pipeline_layout *layout, - const struct radv_shader_info *info, + const struct radv_pipeline_layout *layout, const struct radv_shader_info *info, const struct radv_shader_args *args) { apply_layout_state state = { diff --git a/src/amd/vulkan/nir/radv_nir_lower_abi.c b/src/amd/vulkan/nir/radv_nir_lower_abi.c index 8b22edd..c394b98 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_abi.c +++ b/src/amd/vulkan/nir/radv_nir_lower_abi.c @@ -30,7 +30,7 @@ #include "radv_shader.h" #include "radv_shader_args.h" -#define GET_SGPR_FIELD_NIR(arg, field) \ +#define GET_SGPR_FIELD_NIR(arg, field) \ ac_nir_unpack_arg(b, &s->args->ac, arg, field##__SHIFT, util_bitcount(field##__MASK)) typedef struct { @@ -45,12 +45,11 @@ typedef struct { static nir_ssa_def * load_ring(nir_builder *b, unsigned ring, lower_abi_state *s) { - struct ac_arg arg = b->shader->info.stage == MESA_SHADER_TASK ? s->args->task_ring_offsets - : s->args->ac.ring_offsets; + struct ac_arg arg = + b->shader->info.stage == MESA_SHADER_TASK ? s->args->task_ring_offsets : s->args->ac.ring_offsets; nir_ssa_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg); - ring_offsets = - nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1)); + ring_offsets = nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1)); return nir_load_smem_amd(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), .align_mul = 4u); } @@ -100,8 +99,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) case nir_intrinsic_load_tcs_num_patches_amd: if (s->pl_key->dynamic_patch_control_points) { if (stage == MESA_SHADER_TESS_CTRL) { - replacement = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, - TCS_OFFCHIP_LAYOUT_NUM_PATCHES); + replacement = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES); } else { replacement = ac_nir_load_arg(b, &s->args->ac, s->args->tes_num_patches); } @@ -129,8 +127,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) replacement = load_ring(b, RING_PS_ATTR, s); /* Note, the HW always assumes there is at least 1 per-vertex param. */ - const unsigned total_num_params = - MAX2(1, s->info->outinfo.param_exports) + s->info->outinfo.prim_param_exports; + const unsigned total_num_params = MAX2(1, s->info->outinfo.param_exports) + s->info->outinfo.prim_param_exports; nir_ssa_def *dword1 = nir_channel(b, replacement, 1); dword1 = nir_ior_imm(b, dword1, S_008F04_STRIDE(16 * total_num_params)); @@ -139,16 +136,13 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) case nir_intrinsic_load_ring_attr_offset_amd: { nir_ssa_def *ring_attr_offset = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_attr_offset); - replacement = - nir_ishl_imm(b, nir_ubfe_imm(b, ring_attr_offset, 0, 15), - 9); /* 512b increments. */ + replacement = nir_ishl_imm(b, nir_ubfe_imm(b, ring_attr_offset, 0, 15), 9); /* 512b increments. */ break; } case nir_intrinsic_load_tess_rel_patch_id_amd: if (stage == MESA_SHADER_TESS_CTRL) { - replacement = nir_extract_u8(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_rel_ids), - nir_imm_int(b, 0)); + replacement = nir_extract_u8(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_rel_ids), nir_imm_int(b, 0)); } else if (stage == MESA_SHADER_TESS_EVAL) { /* Setting an upper bound like this will actually make it possible * to optimize some multiplications (in address calculations) so that @@ -156,8 +150,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) */ nir_ssa_def *arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tes_rel_patch_id); nir_intrinsic_instr *load_arg = nir_instr_as_intrinsic(arg->parent_instr); - nir_intrinsic_set_arg_upper_bound_u32_amd( - load_arg, 2048 / MAX2(b->shader->info.tess.tcs_vertices_out, 1)); + nir_intrinsic_set_arg_upper_bound_u32_amd(load_arg, 2048 / MAX2(b->shader->info.tess.tcs_vertices_out, 1)); replacement = arg; } else { unreachable("invalid tessellation shader stage"); @@ -166,8 +159,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) case nir_intrinsic_load_patch_vertices_in: if (stage == MESA_SHADER_TESS_CTRL) { if (s->pl_key->dynamic_patch_control_points) { - replacement = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, - TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS); + replacement = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS); } else { replacement = nir_imm_int(b, s->pl_key->tcs.tess_input_vertices); } @@ -177,16 +169,13 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) unreachable("invalid tessellation shader stage"); break; case nir_intrinsic_load_gs_vertex_offset_amd: - replacement = - ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[nir_intrinsic_base(intrin)]); + replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[nir_intrinsic_base(intrin)]); break; case nir_intrinsic_load_workgroup_num_input_vertices_amd: - replacement = nir_ubfe_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), - 12, 9); + replacement = nir_ubfe_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 12, 9); break; case nir_intrinsic_load_workgroup_num_input_primitives_amd: - replacement = nir_ubfe_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), - 22, 9); + replacement = nir_ubfe_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 22, 9); break; case nir_intrinsic_load_packed_passthrough_primitive_amd: /* NGG passthrough mode: the HW already packs the primitive export value to a single register. @@ -214,12 +203,11 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) * so the below is equivalent to: "ult(ubfe(gs_tg_info, 22, 9), 16)", but * ACO can optimize out the comparison to zero (see try_optimize_scc_nocompare). */ - nir_ssa_def *small_workgroup = - nir_ieq_imm(b, nir_iand_imm(b, gs_tg_info, BITFIELD_RANGE(22 + 4, 9 - 4)), 0); + nir_ssa_def *small_workgroup = nir_ieq_imm(b, nir_iand_imm(b, gs_tg_info, BITFIELD_RANGE(22 + 4, 9 - 4)), 0); - nir_ssa_def *mask = nir_bcsel( - b, small_workgroup, nir_imm_int(b, radv_nggc_none), - nir_imm_int(b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives)); + nir_ssa_def *mask = + nir_bcsel(b, small_workgroup, nir_imm_int(b, radv_nggc_none), + nir_imm_int(b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives)); nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings); replacement = nir_ine_imm(b, nir_iand(b, settings, mask), 0); break; @@ -270,39 +258,36 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) break; case nir_intrinsic_load_ring_mesh_scratch_offset_amd: /* gs_tg_info[0:11] is ordered_wave_id. Multiply by the ring entry size. */ - replacement = nir_imul_imm( - b, nir_iand_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 0xfff), - RADV_MESH_SCRATCH_ENTRY_BYTES); + replacement = nir_imul_imm(b, nir_iand_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 0xfff), + RADV_MESH_SCRATCH_ENTRY_BYTES); break; case nir_intrinsic_load_task_ring_entry_amd: replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry); break; case nir_intrinsic_load_lshs_vertex_stride_amd: { - unsigned io_num = stage == MESA_SHADER_VERTEX ? s->info->vs.num_linked_outputs - : s->info->tcs.num_linked_inputs; + unsigned io_num = stage == MESA_SHADER_VERTEX ? s->info->vs.num_linked_outputs : s->info->tcs.num_linked_inputs; replacement = nir_imm_int(b, get_tcs_input_vertex_stride(io_num)); break; } case nir_intrinsic_load_esgs_vertex_stride_amd: { /* Emulate VGT_ESGS_RING_ITEMSIZE on GFX9+ to reduce context register writes. */ assert(s->gfx_level >= GFX9); - const unsigned stride = s->info->is_ngg ? s->info->ngg_info.vgt_esgs_ring_itemsize - : s->info->gs_ring_info.vgt_esgs_ring_itemsize; + const unsigned stride = + s->info->is_ngg ? s->info->ngg_info.vgt_esgs_ring_itemsize : s->info->gs_ring_info.vgt_esgs_ring_itemsize; replacement = nir_imm_int(b, stride); break; } case nir_intrinsic_load_hs_out_patch_data_offset_amd: { unsigned out_vertices_per_patch = b->shader->info.tess.tcs_vertices_out; - unsigned num_tcs_outputs = stage == MESA_SHADER_TESS_CTRL ? s->info->tcs.num_linked_outputs - : s->info->tes.num_linked_inputs; + unsigned num_tcs_outputs = + stage == MESA_SHADER_TESS_CTRL ? s->info->tcs.num_linked_outputs : s->info->tes.num_linked_inputs; int per_vertex_output_patch_size = out_vertices_per_patch * num_tcs_outputs * 16u; if (s->pl_key->dynamic_patch_control_points) { nir_ssa_def *num_patches; if (stage == MESA_SHADER_TESS_CTRL) { - num_patches = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, - TCS_OFFCHIP_LAYOUT_NUM_PATCHES); + num_patches = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES); } else { num_patches = ac_nir_load_arg(b, &s->args->ac, s->args->tes_num_patches); } @@ -328,8 +313,8 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) offset = nir_iadd(b, offset, nir_ishl_imm(b, intrin->src[1].ssa, 3)); } - replacement = nir_load_global_amd(b, 2, 32, addr, offset, .base = sample_pos_offset, - .access = ACCESS_NON_WRITEABLE); + replacement = + nir_load_global_amd(b, 2, 32, addr, offset, .base = sample_pos_offset, .access = ACCESS_NON_WRITEABLE); break; } case nir_intrinsic_load_rasterization_samples_amd: @@ -362,21 +347,18 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) break; } case nir_intrinsic_atomic_add_gs_emit_prim_count_amd: - nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, - nir_imm_int(b, RADV_NGG_QUERY_PIPELINE_STAT_OFFSET), + nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, RADV_NGG_QUERY_PIPELINE_STAT_OFFSET), nir_imm_int(b, 0x100)); break; case nir_intrinsic_atomic_add_gen_prim_count_amd: - nir_gds_atomic_add_amd( - b, 32, intrin->src[0].ssa, - nir_imm_int(b, RADV_NGG_QUERY_PRIM_GEN_OFFSET(nir_intrinsic_stream_id(intrin))), - nir_imm_int(b, 0x100)); + nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, + nir_imm_int(b, RADV_NGG_QUERY_PRIM_GEN_OFFSET(nir_intrinsic_stream_id(intrin))), + nir_imm_int(b, 0x100)); break; case nir_intrinsic_atomic_add_xfb_prim_count_amd: - nir_gds_atomic_add_amd( - b, 32, intrin->src[0].ssa, - nir_imm_int(b, RADV_NGG_QUERY_PRIM_XFB_OFFSET(nir_intrinsic_stream_id(intrin))), - nir_imm_int(b, 0x100)); + nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, + nir_imm_int(b, RADV_NGG_QUERY_PRIM_XFB_OFFSET(nir_intrinsic_stream_id(intrin))), + nir_imm_int(b, 0x100)); break; case nir_intrinsic_atomic_add_gs_invocation_count_amd: /* TODO: add gs invocation query emulation. */ @@ -389,15 +371,13 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_write_index); break; case nir_intrinsic_load_streamout_buffer_amd: { - nir_ssa_def *ptr = - nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers), - nir_imm_int(b, s->address32_hi)); + nir_ssa_def *ptr = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers), + nir_imm_int(b, s->address32_hi)); replacement = nir_load_smem_amd(b, 4, ptr, nir_imm_int(b, nir_intrinsic_base(intrin) * 16)); break; } case nir_intrinsic_load_streamout_offset_amd: - replacement = - ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_offset[nir_intrinsic_base(intrin)]); + replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_offset[nir_intrinsic_base(intrin)]); break; case nir_intrinsic_load_lds_ngg_gs_out_vertex_base_amd: @@ -464,10 +444,8 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) } case nir_intrinsic_load_poly_line_smooth_enabled: if (s->pl_key->dynamic_line_rast_mode) { - nir_ssa_def *line_rast_mode = - GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_LINE_RAST_MODE); - replacement = - nir_ieq_imm(b, line_rast_mode, VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT); + nir_ssa_def *line_rast_mode = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_LINE_RAST_MODE); + replacement = nir_ieq_imm(b, line_rast_mode, VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT); } else { replacement = nir_imm_bool(b, s->pl_key->ps.line_smooth_enabled); } @@ -516,22 +494,19 @@ load_gsvs_ring(nir_builder *b, lower_abi_state *s, unsigned stream_id) assert(stride < (1 << 14)); if (stream_offset) { - nir_ssa_def *addr = - nir_pack_64_2x32_split(b, nir_channel(b, ring, 0), nir_channel(b, ring, 1)); + nir_ssa_def *addr = nir_pack_64_2x32_split(b, nir_channel(b, ring, 0), nir_channel(b, ring, 1)); addr = nir_iadd_imm(b, addr, stream_offset); ring = nir_vector_insert_imm(b, ring, nir_unpack_64_2x32_split_x(b, addr), 0); ring = nir_vector_insert_imm(b, ring, nir_unpack_64_2x32_split_y(b, addr), 1); } - ring = nir_vector_insert_imm( - b, ring, nir_ior_imm(b, nir_channel(b, ring, 1), S_008F04_STRIDE(stride)), 1); + ring = nir_vector_insert_imm(b, ring, nir_ior_imm(b, nir_channel(b, ring, 1), S_008F04_STRIDE(stride)), 1); return nir_vector_insert_imm(b, ring, nir_imm_int(b, s->info->wave_size), 2); } void -radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, - const struct radv_shader_info *info, const struct radv_shader_args *args, - const struct radv_pipeline_key *pl_key, uint32_t address32_hi) +radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, const struct radv_shader_info *info, + const struct radv_shader_args *args, const struct radv_pipeline_key *pl_key, uint32_t address32_hi) { lower_abi_state state = { .gfx_level = gfx_level, @@ -552,6 +527,5 @@ radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, state.gsvs_ring[i] = load_gsvs_ring(&b, &state, i); } - nir_shader_instructions_pass(shader, lower_abi_instr, - nir_metadata_dominance | nir_metadata_block_index, &state); + nir_shader_instructions_pass(shader, lower_abi_instr, nir_metadata_dominance | nir_metadata_block_index, &state); } diff --git a/src/amd/vulkan/nir/radv_nir_lower_fs_barycentric.c b/src/amd/vulkan/nir/radv_nir_lower_fs_barycentric.c index 87eba76..af98cc4 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_fs_barycentric.c +++ b/src/amd/vulkan/nir/radv_nir_lower_fs_barycentric.c @@ -37,12 +37,12 @@ lower_interp_center_smooth(nir_builder *b, nir_ssa_def *offset) { nir_ssa_def *pull_model = nir_load_barycentric_model(b, 32); - nir_ssa_def *deriv_x = nir_vec3(b, nir_fddx_fine(b, nir_channel(b, pull_model, 0)), - nir_fddx_fine(b, nir_channel(b, pull_model, 1)), - nir_fddx_fine(b, nir_channel(b, pull_model, 2))); - nir_ssa_def *deriv_y = nir_vec3(b, nir_fddy_fine(b, nir_channel(b, pull_model, 0)), - nir_fddy_fine(b, nir_channel(b, pull_model, 1)), - nir_fddy_fine(b, nir_channel(b, pull_model, 2))); + nir_ssa_def *deriv_x = + nir_vec3(b, nir_fddx_fine(b, nir_channel(b, pull_model, 0)), nir_fddx_fine(b, nir_channel(b, pull_model, 1)), + nir_fddx_fine(b, nir_channel(b, pull_model, 2))); + nir_ssa_def *deriv_y = + nir_vec3(b, nir_fddy_fine(b, nir_channel(b, pull_model, 0)), nir_fddy_fine(b, nir_channel(b, pull_model, 1)), + nir_fddy_fine(b, nir_channel(b, pull_model, 2))); nir_ssa_def *offset_x = nir_channel(b, offset, 0); nir_ssa_def *offset_y = nir_channel(b, offset, 1); @@ -68,8 +68,7 @@ lower_barycentric_coord_at_offset(nir_builder *b, nir_ssa_def *src, enum glsl_in } static nir_ssa_def * -lower_barycentric_coord_at_sample(nir_builder *b, lower_fs_barycentric_state *state, - nir_intrinsic_instr *intrin) +lower_barycentric_coord_at_sample(nir_builder *b, lower_fs_barycentric_state *state, nir_intrinsic_instr *intrin) { const enum glsl_interp_mode mode = (enum glsl_interp_mode)nir_intrinsic_interp_mode(intrin); nir_ssa_def *num_samples = nir_load_rasterization_samples_amd(b); @@ -80,13 +79,11 @@ lower_barycentric_coord_at_sample(nir_builder *b, lower_fs_barycentric_state *st nir_push_if(b, nir_ieq_imm(b, num_samples, 1)); { - res1 = nir_load_barycentric_pixel( - b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin)); + res1 = nir_load_barycentric_pixel(b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin)); } nir_push_else(b, NULL); { - nir_ssa_def *sample_pos = - nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples); + nir_ssa_def *sample_pos = nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples); /* sample_pos -= 0.5 */ sample_pos = nir_fadd_imm(b, sample_pos, -0.5f); @@ -98,11 +95,9 @@ lower_barycentric_coord_at_sample(nir_builder *b, lower_fs_barycentric_state *st new_dest = nir_if_phi(b, res1, res2); } else { if (!state->num_rasterization_samples) { - new_dest = nir_load_barycentric_pixel( - b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin)); + new_dest = nir_load_barycentric_pixel(b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin)); } else { - nir_ssa_def *sample_pos = - nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples); + nir_ssa_def *sample_pos = nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples); /* sample_pos -= 0.5 */ sample_pos = nir_fadd_imm(b, sample_pos, -0.5f); @@ -172,8 +167,7 @@ lower_triangle(nir_builder *b, nir_ssa_def *p1, nir_ssa_def *p2) */ nir_ssa_def *quad_id = nir_ushr_imm(b, nir_load_subgroup_invocation(b), 2); nir_ssa_def *provoking_vtx = nir_load_provoking_vtx_amd(b); - nir_ssa_def *provoking_vtx_id = - nir_ubfe(b, provoking_vtx, nir_ishl_imm(b, quad_id, 1), nir_imm_int(b, 2)); + nir_ssa_def *provoking_vtx_id = nir_ubfe(b, provoking_vtx, nir_ishl_imm(b, quad_id, 1), nir_imm_int(b, 2)); /* Compute barycentrics. */ v0_bary[0] = nir_fsub(b, nir_fsub(b, nir_imm_float(b, 1.0f), p2), p1); @@ -198,8 +192,7 @@ lower_triangle(nir_builder *b, nir_ssa_def *p1, nir_ssa_def *p2) } static bool -lower_load_barycentric_coord(nir_builder *b, lower_fs_barycentric_state *state, - nir_intrinsic_instr *intrin) +lower_load_barycentric_coord(nir_builder *b, lower_fs_barycentric_state *state, nir_intrinsic_instr *intrin) { nir_ssa_def *interp, *p1, *p2; nir_ssa_def *new_dest; @@ -264,8 +257,7 @@ lower_load_barycentric_coord(nir_builder *b, lower_fs_barycentric_state *state, } bool -radv_nir_lower_fs_barycentric(nir_shader *shader, const struct radv_pipeline_key *key, - unsigned rast_prim) +radv_nir_lower_fs_barycentric(nir_shader *shader, const struct radv_pipeline_key *key, unsigned rast_prim) { nir_function_impl *impl = nir_shader_get_entrypoint(shader); bool progress = false; diff --git a/src/amd/vulkan/nir/radv_nir_lower_fs_intrinsics.c b/src/amd/vulkan/nir/radv_nir_lower_fs_intrinsics.c index 07423d6..170c5b3 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_fs_intrinsics.c +++ b/src/amd/vulkan/nir/radv_nir_lower_fs_intrinsics.c @@ -50,16 +50,14 @@ radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_pipeline_stage * switch (intrin->intrinsic) { case nir_intrinsic_load_sample_mask_in: { - nir_ssa_def *sample_coverage = - nir_load_vector_arg_amd(&b, 1, .base = args->ac.sample_coverage.arg_index); + nir_ssa_def *sample_coverage = nir_load_vector_arg_amd(&b, 1, .base = args->ac.sample_coverage.arg_index); nir_ssa_def *def = NULL; if (info->ps.uses_sample_shading || key->ps.sample_shading_enable) { /* gl_SampleMaskIn[0] = (SampleCoverage & (PsIterMask << gl_SampleID)). */ - nir_ssa_def *ps_state = - nir_load_scalar_arg_amd(&b, 1, .base = args->ps_state.arg_index); - nir_ssa_def *ps_iter_mask = nir_ubfe_imm(&b, ps_state, PS_STATE_PS_ITER_MASK__SHIFT, - util_bitcount(PS_STATE_PS_ITER_MASK__MASK)); + nir_ssa_def *ps_state = nir_load_scalar_arg_amd(&b, 1, .base = args->ps_state.arg_index); + nir_ssa_def *ps_iter_mask = + nir_ubfe_imm(&b, ps_state, PS_STATE_PS_ITER_MASK__SHIFT, util_bitcount(PS_STATE_PS_ITER_MASK__MASK)); nir_ssa_def *sample_id = nir_load_sample_id(&b); def = nir_iand(&b, sample_coverage, nir_ishl(&b, ps_iter_mask, sample_id)); } else { @@ -86,8 +84,7 @@ radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_pipeline_stage * adjusted_frag_z = nir_ffma_imm1(&b, adjusted_frag_z, 0.0625f, frag_z); /* VRS Rate X = Ancillary[2:3] */ - nir_ssa_def *ancillary = - nir_load_vector_arg_amd(&b, 1, .base = args->ac.ancillary.arg_index); + nir_ssa_def *ancillary = nir_load_vector_arg_amd(&b, 1, .base = args->ac.ancillary.arg_index); nir_ssa_def *x_rate = nir_ubfe_imm(&b, ancillary, 2, 2); /* xRate = xRate == 0x1 ? adjusted_frag_z : frag_z. */ @@ -109,36 +106,32 @@ radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_pipeline_stage * nir_push_if(&b, nir_ieq_imm(&b, num_samples, 1)); { - res1 = nir_load_barycentric_pixel( - &b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin)); + res1 = nir_load_barycentric_pixel(&b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin)); } nir_push_else(&b, NULL); { - nir_ssa_def *sample_pos = - nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples); + nir_ssa_def *sample_pos = nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples); /* sample_pos -= 0.5 */ sample_pos = nir_fadd_imm(&b, sample_pos, -0.5f); - res2 = nir_load_barycentric_at_offset( - &b, 32, sample_pos, .interp_mode = nir_intrinsic_interp_mode(intrin)); + res2 = nir_load_barycentric_at_offset(&b, 32, sample_pos, + .interp_mode = nir_intrinsic_interp_mode(intrin)); } nir_pop_if(&b, NULL); new_dest = nir_if_phi(&b, res1, res2); } else { if (!key->ps.num_samples) { - new_dest = nir_load_barycentric_pixel( - &b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin)); + new_dest = nir_load_barycentric_pixel(&b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin)); } else { - nir_ssa_def *sample_pos = - nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples); + nir_ssa_def *sample_pos = nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples); /* sample_pos -= 0.5 */ sample_pos = nir_fadd_imm(&b, sample_pos, -0.5f); - new_dest = nir_load_barycentric_at_offset( - &b, 32, sample_pos, .interp_mode = nir_intrinsic_interp_mode(intrin)); + new_dest = nir_load_barycentric_at_offset(&b, 32, sample_pos, + .interp_mode = nir_intrinsic_interp_mode(intrin)); } } diff --git a/src/amd/vulkan/nir/radv_nir_lower_io.c b/src/amd/vulkan/nir/radv_nir_lower_io.c index e01b059..47ffdcf 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_io.c +++ b/src/amd/vulkan/nir/radv_nir_lower_io.c @@ -43,8 +43,7 @@ radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask) NIR_PASS(progress, nir, nir_lower_array_deref_of_vec, mask, nir_lower_direct_array_deref_of_vec_load | nir_lower_indirect_array_deref_of_vec_load | - nir_lower_direct_array_deref_of_vec_store | - nir_lower_indirect_array_deref_of_vec_store); + nir_lower_direct_array_deref_of_vec_store | nir_lower_indirect_array_deref_of_vec_store); NIR_PASS(progress, nir, nir_lower_io_to_scalar_early, mask); if (progress) { /* Optimize the new vector code and then remove dead vars */ @@ -69,8 +68,7 @@ radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask) NIR_PASS(_, nir, nir_opt_copy_prop_vars); NIR_PASS(_, nir, nir_opt_dce); - NIR_PASS(_, nir, nir_remove_dead_variables, - nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL); + NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL); } } @@ -83,8 +81,7 @@ radv_nir_lower_io(struct radv_device *device, nir_shader *nir) if (nir->info.stage == MESA_SHADER_VERTEX) { NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0); - NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4, - nir_lower_io_lower_64bit_to_32); + NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32); } else { NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32); @@ -117,39 +114,35 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage info->vs.tcs_temp_only_input_mask); return true; } else if (info->vs.as_es) { - NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, NULL, - device->physical_device->rad_info.gfx_level, info->esgs_itemsize); + NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, NULL, device->physical_device->rad_info.gfx_level, + info->esgs_itemsize); return true; } } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) { NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, NULL, info->vs.tcs_in_out_eq); - NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, NULL, - device->physical_device->rad_info.gfx_level, info->tcs.tes_reads_tess_factors, - info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, - info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs, info->wave_size, - false, false, true); + NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, NULL, device->physical_device->rad_info.gfx_level, + info->tcs.tes_reads_tess_factors, info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, + info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs, info->wave_size, false, false, true); return true; } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) { NIR_PASS_V(nir, ac_nir_lower_tes_inputs_to_mem, NULL); if (info->tes.as_es) { - NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, NULL, - device->physical_device->rad_info.gfx_level, info->esgs_itemsize); + NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, NULL, device->physical_device->rad_info.gfx_level, + info->esgs_itemsize); } return true; } else if (nir->info.stage == MESA_SHADER_GEOMETRY) { - NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, NULL, - device->physical_device->rad_info.gfx_level, false); + NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, NULL, device->physical_device->rad_info.gfx_level, false); return true; } else if (nir->info.stage == MESA_SHADER_TASK) { ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, device->physical_device->task_info.num_entries); return true; } else if (nir->info.stage == MESA_SHADER_MESH) { - ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, - device->physical_device->task_info.num_entries); + ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, device->physical_device->task_info.num_entries); return true; } diff --git a/src/amd/vulkan/nir/radv_nir_lower_primitive_shading_rate.c b/src/amd/vulkan/nir/radv_nir_lower_primitive_shading_rate.c index 8489efc..b137345 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_primitive_shading_rate.c +++ b/src/amd/vulkan/nir/radv_nir_lower_primitive_shading_rate.c @@ -47,8 +47,7 @@ radv_nir_lower_primitive_shading_rate(nir_shader *nir, enum amd_gfx_level gfx_le continue; nir_variable *var = nir_intrinsic_get_var(intr, 0); - if (var->data.mode != nir_var_shader_out || - var->data.location != VARYING_SLOT_PRIMITIVE_SHADING_RATE) + if (var->data.mode != nir_var_shader_out || var->data.location != VARYING_SLOT_PRIMITIVE_SHADING_RATE) continue; b.cursor = nir_before_instr(instr); @@ -91,8 +90,7 @@ radv_nir_lower_primitive_shading_rate(nir_shader *nir, enum amd_gfx_level gfx_le y_rate_shift += 26; } - out = nir_ior(&b, nir_ishl_imm(&b, x_rate, x_rate_shift), - nir_ishl_imm(&b, y_rate, y_rate_shift)); + out = nir_ior(&b, nir_ishl_imm(&b, x_rate, x_rate_shift), nir_ishl_imm(&b, y_rate, y_rate_shift)); nir_instr_rewrite_src(&intr->instr, &intr->src[1], nir_src_for_ssa(out)); diff --git a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c index 2c66385..d3938cf 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c @@ -44,8 +44,7 @@ typedef struct { } rq_variable; static rq_variable * -rq_variable_create(void *ctx, nir_shader *shader, unsigned array_length, - const struct glsl_type *type, const char *name) +rq_variable_create(void *ctx, nir_shader *shader, unsigned array_length, const struct glsl_type *type, const char *name) { rq_variable *result = ralloc(ctx, rq_variable); result->array_length = array_length; @@ -66,11 +65,9 @@ nir_load_array(nir_builder *b, nir_variable *array, nir_ssa_def *index) } static void -nir_store_array(nir_builder *b, nir_variable *array, nir_ssa_def *index, nir_ssa_def *value, - unsigned writemask) +nir_store_array(nir_builder *b, nir_variable *array, nir_ssa_def *index, nir_ssa_def *value, unsigned writemask) { - nir_store_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, array), index), value, - writemask); + nir_store_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, array), index), value, writemask); } static nir_deref_instr * @@ -92,8 +89,7 @@ rq_load_var(nir_builder *b, nir_ssa_def *index, rq_variable *var) } static void -rq_store_var(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *value, - unsigned writemask) +rq_store_var(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *value, unsigned writemask) { if (var->array_length == 1) { nir_store_var(b, var->variable, value, writemask); @@ -115,22 +111,19 @@ rq_load_array(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def return nir_load_array(b, var->variable, array_index); return nir_load_deref( - b, - nir_build_deref_array( - b, nir_build_deref_array(b, nir_build_deref_var(b, var->variable), index), array_index)); + b, nir_build_deref_array(b, nir_build_deref_array(b, nir_build_deref_var(b, var->variable), index), array_index)); } static void -rq_store_array(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *array_index, - nir_ssa_def *value, unsigned writemask) +rq_store_array(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *array_index, nir_ssa_def *value, + unsigned writemask) { if (var->array_length == 1) { nir_store_array(b, var->variable, array_index, value, writemask); } else { nir_store_deref( b, - nir_build_deref_array( - b, nir_build_deref_array(b, nir_build_deref_var(b, var->variable), index), array_index), + nir_build_deref_array(b, nir_build_deref_array(b, nir_build_deref_var(b, var->variable), index), array_index), value, writemask); } } @@ -181,105 +174,87 @@ struct ray_query_vars { uint32_t stack_entries; }; -#define VAR_NAME(name) \ - strcat(strcpy(ralloc_size(ctx, strlen(base_name) + strlen(name) + 1), base_name), name) +#define VAR_NAME(name) strcat(strcpy(ralloc_size(ctx, strlen(base_name) + strlen(name) + 1), base_name), name) static struct ray_query_traversal_vars -init_ray_query_traversal_vars(void *ctx, nir_shader *shader, unsigned array_length, - const char *base_name) +init_ray_query_traversal_vars(void *ctx, nir_shader *shader, unsigned array_length, const char *base_name) { struct ray_query_traversal_vars result; const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3); result.origin = rq_variable_create(ctx, shader, array_length, vec3_type, VAR_NAME("_origin")); - result.direction = - rq_variable_create(ctx, shader, array_length, vec3_type, VAR_NAME("_direction")); - - result.bvh_base = - rq_variable_create(ctx, shader, array_length, glsl_uint64_t_type(), VAR_NAME("_bvh_base")); - result.stack = - rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_stack")); - result.top_stack = - rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_top_stack")); - result.stack_low_watermark = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), - VAR_NAME("_stack_low_watermark")); - result.current_node = - rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_current_node")); - result.previous_node = - rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_previous_node")); - result.instance_top_node = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), - VAR_NAME("_instance_top_node")); - result.instance_bottom_node = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), - VAR_NAME("_instance_bottom_node")); + result.direction = rq_variable_create(ctx, shader, array_length, vec3_type, VAR_NAME("_direction")); + + result.bvh_base = rq_variable_create(ctx, shader, array_length, glsl_uint64_t_type(), VAR_NAME("_bvh_base")); + result.stack = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_stack")); + result.top_stack = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_top_stack")); + result.stack_low_watermark = + rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_stack_low_watermark")); + result.current_node = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_current_node")); + result.previous_node = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_previous_node")); + result.instance_top_node = + rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_instance_top_node")); + result.instance_bottom_node = + rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_instance_bottom_node")); return result; } static struct ray_query_intersection_vars -init_ray_query_intersection_vars(void *ctx, nir_shader *shader, unsigned array_length, - const char *base_name) +init_ray_query_intersection_vars(void *ctx, nir_shader *shader, unsigned array_length, const char *base_name) { struct ray_query_intersection_vars result; const struct glsl_type *vec2_type = glsl_vector_type(GLSL_TYPE_FLOAT, 2); - result.primitive_id = - rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_primitive_id")); - result.geometry_id_and_flags = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), - VAR_NAME("_geometry_id_and_flags")); - result.instance_addr = rq_variable_create(ctx, shader, array_length, glsl_uint64_t_type(), - VAR_NAME("_instance_addr")); - result.intersection_type = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), - VAR_NAME("_intersection_type")); - result.opaque = - rq_variable_create(ctx, shader, array_length, glsl_bool_type(), VAR_NAME("_opaque")); - result.frontface = - rq_variable_create(ctx, shader, array_length, glsl_bool_type(), VAR_NAME("_frontface")); - result.sbt_offset_and_flags = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), - VAR_NAME("_sbt_offset_and_flags")); - result.barycentrics = - rq_variable_create(ctx, shader, array_length, vec2_type, VAR_NAME("_barycentrics")); + result.primitive_id = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_primitive_id")); + result.geometry_id_and_flags = + rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_geometry_id_and_flags")); + result.instance_addr = + rq_variable_create(ctx, shader, array_length, glsl_uint64_t_type(), VAR_NAME("_instance_addr")); + result.intersection_type = + rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_intersection_type")); + result.opaque = rq_variable_create(ctx, shader, array_length, glsl_bool_type(), VAR_NAME("_opaque")); + result.frontface = rq_variable_create(ctx, shader, array_length, glsl_bool_type(), VAR_NAME("_frontface")); + result.sbt_offset_and_flags = + rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_sbt_offset_and_flags")); + result.barycentrics = rq_variable_create(ctx, shader, array_length, vec2_type, VAR_NAME("_barycentrics")); result.t = rq_variable_create(ctx, shader, array_length, glsl_float_type(), VAR_NAME("_t")); return result; } static void -init_ray_query_vars(nir_shader *shader, unsigned array_length, struct ray_query_vars *dst, - const char *base_name, uint32_t max_shared_size) +init_ray_query_vars(nir_shader *shader, unsigned array_length, struct ray_query_vars *dst, const char *base_name, + uint32_t max_shared_size) { void *ctx = dst; const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3); - dst->root_bvh_base = rq_variable_create(dst, shader, array_length, glsl_uint64_t_type(), - VAR_NAME("_root_bvh_base")); + dst->root_bvh_base = rq_variable_create(dst, shader, array_length, glsl_uint64_t_type(), VAR_NAME("_root_bvh_base")); dst->flags = rq_variable_create(dst, shader, array_length, glsl_uint_type(), VAR_NAME("_flags")); - dst->cull_mask = - rq_variable_create(dst, shader, array_length, glsl_uint_type(), VAR_NAME("_cull_mask")); + dst->cull_mask = rq_variable_create(dst, shader, array_length, glsl_uint_type(), VAR_NAME("_cull_mask")); dst->origin = rq_variable_create(dst, shader, array_length, vec3_type, VAR_NAME("_origin")); dst->tmin = rq_variable_create(dst, shader, array_length, glsl_float_type(), VAR_NAME("_tmin")); - dst->direction = - rq_variable_create(dst, shader, array_length, vec3_type, VAR_NAME("_direction")); + dst->direction = rq_variable_create(dst, shader, array_length, vec3_type, VAR_NAME("_direction")); - dst->incomplete = - rq_variable_create(dst, shader, array_length, glsl_bool_type(), VAR_NAME("_incomplete")); + dst->incomplete = rq_variable_create(dst, shader, array_length, glsl_bool_type(), VAR_NAME("_incomplete")); dst->closest = init_ray_query_intersection_vars(dst, shader, array_length, VAR_NAME("_closest")); - dst->candidate = - init_ray_query_intersection_vars(dst, shader, array_length, VAR_NAME("_candidate")); + dst->candidate = init_ray_query_intersection_vars(dst, shader, array_length, VAR_NAME("_candidate")); dst->trav = init_ray_query_traversal_vars(dst, shader, array_length, VAR_NAME("_top")); - uint32_t workgroup_size = shader->info.workgroup_size[0] * shader->info.workgroup_size[1] * - shader->info.workgroup_size[2]; + uint32_t workgroup_size = + shader->info.workgroup_size[0] * shader->info.workgroup_size[1] * shader->info.workgroup_size[2]; uint32_t shared_stack_entries = shader->info.ray_queries == 1 ? 16 : 8; uint32_t shared_stack_size = workgroup_size * shared_stack_entries * 4; uint32_t shared_offset = align(shader->info.shared_size, 4); if (shader->info.stage != MESA_SHADER_COMPUTE || array_length > 1 || shared_offset + shared_stack_size > max_shared_size) { - dst->stack = rq_variable_create( - dst, shader, array_length, - glsl_array_type(glsl_uint_type(), MAX_SCRATCH_STACK_ENTRY_COUNT, 0), VAR_NAME("_stack")); + dst->stack = + rq_variable_create(dst, shader, array_length, + glsl_array_type(glsl_uint_type(), MAX_SCRATCH_STACK_ENTRY_COUNT, 0), VAR_NAME("_stack")); dst->stack_entries = MAX_SCRATCH_STACK_ENTRY_COUNT; } else { dst->stack = NULL; @@ -293,8 +268,7 @@ init_ray_query_vars(nir_shader *shader, unsigned array_length, struct ray_query_ #undef VAR_NAME static void -lower_ray_query(nir_shader *shader, nir_variable *ray_query, struct hash_table *ht, - uint32_t max_shared_size) +lower_ray_query(nir_shader *shader, nir_variable *ray_query, struct hash_table *ht, uint32_t max_shared_size) { struct ray_query_vars *vars = ralloc(ht, struct ray_query_vars); @@ -302,8 +276,7 @@ lower_ray_query(nir_shader *shader, nir_variable *ray_query, struct hash_table * if (glsl_type_is_array(ray_query->type)) array_length = glsl_get_length(ray_query->type); - init_ray_query_vars(shader, array_length, vars, ray_query->name == NULL ? "" : ray_query->name, - max_shared_size); + init_ray_query_vars(shader, array_length, vars, ray_query->name == NULL ? "" : ray_query->name, max_shared_size); _mesa_hash_table_insert(ht, ray_query, vars); } @@ -312,14 +285,12 @@ static void copy_candidate_to_closest(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars) { rq_copy_var(b, index, vars->closest.barycentrics, vars->candidate.barycentrics, 0x3); - rq_copy_var(b, index, vars->closest.geometry_id_and_flags, vars->candidate.geometry_id_and_flags, - 0x1); + rq_copy_var(b, index, vars->closest.geometry_id_and_flags, vars->candidate.geometry_id_and_flags, 0x1); rq_copy_var(b, index, vars->closest.instance_addr, vars->candidate.instance_addr, 0x1); rq_copy_var(b, index, vars->closest.intersection_type, vars->candidate.intersection_type, 0x1); rq_copy_var(b, index, vars->closest.opaque, vars->candidate.opaque, 0x1); rq_copy_var(b, index, vars->closest.frontface, vars->candidate.frontface, 0x1); - rq_copy_var(b, index, vars->closest.sbt_offset_and_flags, vars->candidate.sbt_offset_and_flags, - 0x1); + rq_copy_var(b, index, vars->closest.sbt_offset_and_flags, vars->candidate.sbt_offset_and_flags, 0x1); rq_copy_var(b, index, vars->closest.primitive_id, vars->candidate.primitive_id, 0x1); rq_copy_var(b, index, vars->closest.t, vars->candidate.t, 0x1); } @@ -332,8 +303,8 @@ insert_terminate_on_first_hit(nir_builder *b, nir_ssa_def *index, struct ray_que if (ray_flags) terminate_on_first_hit = ray_flags->terminate_on_first_hit; else - terminate_on_first_hit = nir_test_mask(b, rq_load_var(b, index, vars->flags), - SpvRayFlagsTerminateOnFirstHitKHRMask); + terminate_on_first_hit = + nir_test_mask(b, rq_load_var(b, index, vars->flags), SpvRayFlagsTerminateOnFirstHitKHRMask); nir_push_if(b, terminate_on_first_hit); { rq_store_var(b, index, vars->incomplete, nir_imm_false(b), 0x1); @@ -365,15 +336,11 @@ lower_rq_generate_intersection(nir_builder *b, nir_ssa_def *index, nir_intrinsic nir_pop_if(b, NULL); } -enum rq_intersection_type { - intersection_type_none, - intersection_type_triangle, - intersection_type_aabb -}; +enum rq_intersection_type { intersection_type_none, intersection_type_triangle, intersection_type_aabb }; static void -lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, - struct ray_query_vars *vars, struct radv_instance *instance) +lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars, + struct radv_instance *instance) { rq_store_var(b, index, vars->flags, instr->src[2].ssa, 0x1); rq_store_var(b, index, vars->cull_mask, nir_ishl_imm(b, instr->src[3].ssa, 24), 0x1); @@ -387,14 +354,12 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins rq_store_var(b, index, vars->trav.direction, instr->src[6].ssa, 0x7); rq_store_var(b, index, vars->closest.t, instr->src[7].ssa, 0x1); - rq_store_var(b, index, vars->closest.intersection_type, nir_imm_int(b, intersection_type_none), - 0x1); + rq_store_var(b, index, vars->closest.intersection_type, nir_imm_int(b, intersection_type_none), 0x1); nir_ssa_def *accel_struct = instr->src[1].ssa; nir_ssa_def *bvh_offset = nir_build_load_global( - b, 1, 32, - nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)), + b, 1, 32, nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)), .access = ACCESS_NON_WRITEABLE); nir_ssa_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset)); bvh_base = build_addr_to_node(b, bvh_base); @@ -406,8 +371,7 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins rq_store_var(b, index, vars->trav.stack, nir_imm_int(b, 0), 0x1); rq_store_var(b, index, vars->trav.stack_low_watermark, nir_imm_int(b, 0), 0x1); } else { - nir_ssa_def *base_offset = - nir_imul_imm(b, nir_load_local_invocation_index(b), sizeof(uint32_t)); + nir_ssa_def *base_offset = nir_imul_imm(b, nir_load_local_invocation_index(b), sizeof(uint32_t)); base_offset = nir_iadd_imm(b, base_offset, vars->shared_base); rq_store_var(b, index, vars->trav.stack, base_offset, 0x1); rq_store_var(b, index, vars->trav.stack_low_watermark, base_offset, 0x1); @@ -416,18 +380,15 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins rq_store_var(b, index, vars->trav.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 0x1); rq_store_var(b, index, vars->trav.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1); rq_store_var(b, index, vars->trav.instance_top_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1); - rq_store_var(b, index, vars->trav.instance_bottom_node, - nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 0x1); + rq_store_var(b, index, vars->trav.instance_bottom_node, nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 0x1); rq_store_var(b, index, vars->trav.top_stack, nir_imm_int(b, -1), 1); - rq_store_var(b, index, vars->incomplete, - nir_imm_bool(b, !(instance->debug_flags & RADV_DEBUG_NO_RT)), 0x1); + rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, !(instance->debug_flags & RADV_DEBUG_NO_RT)), 0x1); } static nir_ssa_def * -lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, - struct ray_query_vars *vars) +lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars) { bool committed = nir_intrinsic_committed(instr); struct ray_query_intersection_vars *intersection = committed ? &vars->closest : &vars->candidate; @@ -442,26 +403,24 @@ lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, return rq_load_var(b, index, intersection->barycentrics); case nir_ray_query_value_intersection_candidate_aabb_opaque: return nir_iand(b, rq_load_var(b, index, vars->candidate.opaque), - nir_ieq_imm(b, rq_load_var(b, index, vars->candidate.intersection_type), - intersection_type_aabb)); + nir_ieq_imm(b, rq_load_var(b, index, vars->candidate.intersection_type), intersection_type_aabb)); case nir_ray_query_value_intersection_front_face: return rq_load_var(b, index, intersection->frontface); case nir_ray_query_value_intersection_geometry_index: return nir_iand_imm(b, rq_load_var(b, index, intersection->geometry_id_and_flags), 0xFFFFFF); case nir_ray_query_value_intersection_instance_custom_index: { nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr); - return nir_iand_imm(b, - nir_build_load_global(b, 1, 32, - nir_iadd_imm(b, instance_node_addr, - offsetof(struct radv_bvh_instance_node, - custom_instance_and_mask))), - 0xFFFFFF); + return nir_iand_imm( + b, + nir_build_load_global( + b, 1, 32, + nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, custom_instance_and_mask))), + 0xFFFFFF); } case nir_ray_query_value_intersection_instance_id: { nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr); return nir_build_load_global( - b, 1, 32, - nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, instance_id))); + b, 1, 32, nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, instance_id))); } case nir_ray_query_value_intersection_instance_sbt_index: return nir_iand_imm(b, rq_load_var(b, index, intersection->sbt_offset_and_flags), 0xFFFFFF); @@ -483,8 +442,7 @@ lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, for (unsigned r = 0; r < 3; ++r) rows[r] = nir_build_load_global( b, 4, 32, - nir_iadd_imm(b, instance_node_addr, - offsetof(struct radv_bvh_instance_node, otw_matrix) + r * 16)); + nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, otw_matrix) + r * 16)); return nir_vec3(b, nir_channel(b, rows[0], column), nir_channel(b, rows[1], column), nir_channel(b, rows[2], column)); @@ -539,19 +497,16 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio nir_ssa_def *index = data->index; rq_store_var(b, index, vars->candidate.primitive_id, intersection->primitive_id, 1); - rq_store_var(b, index, vars->candidate.geometry_id_and_flags, - intersection->geometry_id_and_flags, 1); + rq_store_var(b, index, vars->candidate.geometry_id_and_flags, intersection->geometry_id_and_flags, 1); rq_store_var(b, index, vars->candidate.opaque, intersection->opaque, 0x1); - rq_store_var(b, index, vars->candidate.intersection_type, nir_imm_int(b, intersection_type_aabb), - 0x1); + rq_store_var(b, index, vars->candidate.intersection_type, nir_imm_int(b, intersection_type_aabb), 0x1); nir_jump(b, nir_jump_break); } static void handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *intersection, - const struct radv_ray_traversal_args *args, - const struct radv_ray_flags *ray_flags) + const struct radv_ray_traversal_args *args, const struct radv_ray_flags *ray_flags) { struct traversal_data *data = args->data; struct ray_query_vars *vars = data->vars; @@ -559,13 +514,11 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int rq_store_var(b, index, vars->candidate.barycentrics, intersection->barycentrics, 3); rq_store_var(b, index, vars->candidate.primitive_id, intersection->base.primitive_id, 1); - rq_store_var(b, index, vars->candidate.geometry_id_and_flags, - intersection->base.geometry_id_and_flags, 1); + rq_store_var(b, index, vars->candidate.geometry_id_and_flags, intersection->base.geometry_id_and_flags, 1); rq_store_var(b, index, vars->candidate.t, intersection->t, 0x1); rq_store_var(b, index, vars->candidate.opaque, intersection->base.opaque, 0x1); rq_store_var(b, index, vars->candidate.frontface, intersection->frontface, 0x1); - rq_store_var(b, index, vars->candidate.intersection_type, - nir_imm_int(b, intersection_type_triangle), 0x1); + rq_store_var(b, index, vars->candidate.intersection_type, nir_imm_int(b, intersection_type_triangle), 0x1); nir_push_if(b, intersection->base.opaque); { @@ -580,8 +533,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int } static void -store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value, - const struct radv_ray_traversal_args *args) +store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value, const struct radv_ray_traversal_args *args) { struct traversal_data *data = args->data; if (data->vars->stack) @@ -601,11 +553,9 @@ load_stack_entry(nir_builder *b, nir_ssa_def *index, const struct radv_ray_trave } static nir_ssa_def * -lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars, - struct radv_device *device) +lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars, struct radv_device *device) { - nir_variable *inv_dir = - nir_local_variable_create(b->impl, glsl_vector_type(GLSL_TYPE_FLOAT, 3), "inv_dir"); + nir_variable *inv_dir = nir_local_variable_create(b->impl, glsl_vector_type(GLSL_TYPE_FLOAT, 3), "inv_dir"); nir_store_var(b, inv_dir, nir_frcp(b, rq_load_var(b, index, vars->trav.direction)), 0x7); struct radv_ray_traversal_vars trav_vars = { @@ -650,9 +600,8 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars args.stack_stride = 1; args.stack_base = 0; } else { - uint32_t workgroup_size = b->shader->info.workgroup_size[0] * - b->shader->info.workgroup_size[1] * - b->shader->info.workgroup_size[2]; + uint32_t workgroup_size = + b->shader->info.workgroup_size[0] * b->shader->info.workgroup_size[1] * b->shader->info.workgroup_size[2]; args.stack_stride = workgroup_size * 4; args.stack_base = vars->shared_base; } @@ -660,8 +609,7 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars nir_push_if(b, rq_load_var(b, index, vars->incomplete)); { nir_ssa_def *incomplete = radv_build_ray_traversal(device, b, &args); - rq_store_var(b, index, vars->incomplete, - nir_iand(b, rq_load_var(b, index, vars->incomplete), incomplete), 1); + rq_store_var(b, index, vars->incomplete, nir_iand(b, rq_load_var(b, index, vars->incomplete), incomplete), 1); } nir_pop_if(b, NULL); @@ -669,8 +617,7 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars } static void -lower_rq_terminate(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, - struct ray_query_vars *vars) +lower_rq_terminate(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars) { rq_store_var(b, index, vars->incomplete, nir_imm_false(b), 0x1); } @@ -716,8 +663,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device if (!nir_intrinsic_is_ray_query(intrinsic->intrinsic)) continue; - nir_deref_instr *ray_query_deref = - nir_instr_as_deref(intrinsic->src[0].ssa->parent_instr); + nir_deref_instr *ray_query_deref = nir_instr_as_deref(intrinsic->src[0].ssa->parent_instr); nir_ssa_def *index = NULL; if (ray_query_deref->deref_type == nir_deref_type_array) { @@ -728,8 +674,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device assert(ray_query_deref->deref_type == nir_deref_type_var); struct ray_query_vars *vars = - (struct ray_query_vars *)_mesa_hash_table_search(query_ht, ray_query_deref->var) - ->data; + (struct ray_query_vars *)_mesa_hash_table_search(query_ht, ray_query_deref->var)->data; builder.cursor = nir_before_instr(instr); diff --git a/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c b/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c index 8f0707b..44770af 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c +++ b/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c @@ -38,8 +38,7 @@ typedef struct { } lower_vs_inputs_state; static nir_ssa_def * -lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin, - lower_vs_inputs_state *s) +lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs_state *s) { nir_src *offset_src = nir_get_io_offset_src(intrin); assert(nir_src_is_const(*offset_src)); @@ -57,8 +56,7 @@ lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin, const unsigned arg_bit_size = MAX2(bit_size, 32); unsigned num_input_args = 1; - nir_ssa_def *input_args[2] = { - ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location]), NULL}; + nir_ssa_def *input_args[2] = {ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location]), NULL}; if (component * 32 + arg_bit_size * num_components > 128) { assert(bit_size == 64); @@ -115,8 +113,7 @@ can_use_untyped_load(const struct util_format_description *f, const unsigned bit } static nir_ssa_def * -oob_input_load_value(nir_builder *b, const unsigned channel_idx, const unsigned bit_size, - const bool is_float) +oob_input_load_value(nir_builder *b, const unsigned channel_idx, const unsigned bit_size, const bool is_float) { /* 22.1.1. Attribute Location and Component Assignment of Vulkan 1.3 specification: * For 64-bit data types, no default attribute values are provided. Input variables @@ -136,8 +133,7 @@ oob_input_load_value(nir_builder *b, const unsigned channel_idx, const unsigned } static unsigned -count_format_bytes(const struct util_format_description *f, const unsigned first_channel, - const unsigned num_channels) +count_format_bytes(const struct util_format_description *f, const unsigned first_channel, const unsigned num_channels) { if (!num_channels) return 0; @@ -165,8 +161,7 @@ format_needs_swizzle(const struct util_format_description *f) } static unsigned -first_used_swizzled_channel(const struct util_format_description *f, const unsigned mask, - const bool backwards) +first_used_swizzled_channel(const struct util_format_description *f, const unsigned mask, const bool backwards) { unsigned first_used = backwards ? 0 : f->nr_channels; const unsigned it_mask = mask & BITFIELD_MASK(f->nr_channels); @@ -181,8 +176,7 @@ first_used_swizzled_channel(const struct util_format_description *f, const unsig } static nir_ssa_def * -adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_adjust, - nir_ssa_def *alpha) +adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_adjust, nir_ssa_def *alpha) { if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED) alpha = nir_f2u32(b, alpha); @@ -247,16 +241,13 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs const struct util_format_description *f = util_format_description(attrib_format); const struct ac_vtx_format_info *vtx_info = ac_get_vtx_format_info(s->rad_info->gfx_level, s->rad_info->family, attrib_format); - const unsigned binding_index = - s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding; - const unsigned desc_index = - util_bitcount(s->info->vs.vb_desc_usage_mask & u_bit_consecutive(0, binding_index)); + const unsigned binding_index = s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding; + const unsigned desc_index = util_bitcount(s->info->vs.vb_desc_usage_mask & u_bit_consecutive(0, binding_index)); nir_ssa_def *vertex_buffers_arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.vertex_buffers); nir_ssa_def *vertex_buffers = nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->rad_info->address32_hi)); - nir_ssa_def *descriptor = - nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16)); + nir_ssa_def *descriptor = nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16)); nir_ssa_def *base_index = calc_vs_input_index(b, location, s); nir_ssa_def *zero = nir_imm_int(b, 0); @@ -283,8 +274,7 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs * Don't shrink the format here because this might allow the backend to * emit fewer (but larger than needed) HW instructions. */ - const unsigned first_trailing_unused_channel = - first_used_swizzled_channel(f, dest_use_mask, true) + 1; + const unsigned first_trailing_unused_channel = first_used_swizzled_channel(f, dest_use_mask, true) + 1; const unsigned max_loaded_channels = MIN2(first_trailing_unused_channel, f->nr_channels); const unsigned fetch_num_channels = first_used_channel >= max_loaded_channels ? 0 : max_loaded_channels - skipped_start; @@ -320,17 +310,15 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs * Note, NONE seems to occur in real use and is considered an array format. */ if (f->is_array && fetch_format != PIPE_FORMAT_NONE) { - while (channels > 1 && attrib_stride && - (const_off + count_format_bytes(f, start, channels)) > attrib_stride) { + while (channels > 1 && attrib_stride && (const_off + count_format_bytes(f, start, channels)) > attrib_stride) { channels--; } /* Keep the fetch format as large as possible to let the backend emit * larger load instructions when it deems them beneficial. */ - fetch_format = - util_format_get_array(f->channel[0].type, f->channel[0].size, f->nr_channels - start, - f->is_unorm || f->is_snorm, f->channel[0].pure_integer); + fetch_format = util_format_get_array(f->channel[0].type, f->channel[0].size, f->nr_channels - start, + f->is_unorm || f->is_snorm, f->channel[0].pure_integer); } assert(f->is_array || channels == fetch_num_channels); @@ -339,17 +327,15 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs * Typed loads can cause GPU hangs when used with improper alignment. */ if (can_use_untyped_load(f, bit_size)) { - loads[num_loads++] = - nir_load_buffer_amd(b, channels, bit_size, descriptor, zero, zero, index, - .base = const_off, .memory_modes = nir_var_shader_in); + loads[num_loads++] = nir_load_buffer_amd(b, channels, bit_size, descriptor, zero, zero, index, + .base = const_off, .memory_modes = nir_var_shader_in); } else { const unsigned align_mul = MAX2(1, s->pl_key->vs.vertex_binding_align[attrib_binding]); const unsigned align_offset = const_off % align_mul; loads[num_loads++] = nir_load_typed_buffer_amd( - b, channels, bit_size, descriptor, zero, zero, index, .base = const_off, - .format = fetch_format, .align_mul = align_mul, .align_offset = align_offset, - .memory_modes = nir_var_shader_in); + b, channels, bit_size, descriptor, zero, zero, index, .base = const_off, .format = fetch_format, + .align_mul = align_mul, .align_offset = align_offset, .memory_modes = nir_var_shader_in); } } @@ -363,9 +349,8 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs max_loaded_channels - first_used_channel, bit_size); /* Return early if possible to avoid generating unnecessary IR. */ - if (num_loads > 0 && first_used_channel == component && - load->num_components == dest_num_components && !needs_swizzle && - alpha_adjust == AC_ALPHA_ADJUST_NONE) + if (num_loads > 0 && first_used_channel == component && load->num_components == dest_num_components && + !needs_swizzle && alpha_adjust == AC_ALPHA_ADJUST_NONE) return load; /* Fill unused and OOB components. @@ -443,6 +428,6 @@ radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_pipeline_stage *v .rad_info = rad_info, }; - return nir_shader_instructions_pass(shader, lower_vs_input_instr, - nir_metadata_dominance | nir_metadata_block_index, &state); + return nir_shader_instructions_pass(shader, lower_vs_input_instr, nir_metadata_dominance | nir_metadata_block_index, + &state); } diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c index 3118464..31f9619 100644 --- a/src/amd/vulkan/radv_acceleration_structure.c +++ b/src/amd/vulkan/radv_acceleration_structure.c @@ -23,9 +23,9 @@ #include "radv_private.h" +#include "meta/radv_meta.h" #include "nir_builder.h" #include "radv_cs.h" -#include "meta/radv_meta.h" #include "radix_sort/radv_radix_sort.h" @@ -170,8 +170,7 @@ get_build_layout(struct radv_device *device, uint32_t leaf_count, } if (accel_struct) { - uint64_t bvh_size = - bvh_leaf_size * leaf_count + sizeof(struct radv_bvh_box32_node) * internal_count; + uint64_t bvh_size = bvh_leaf_size * leaf_count + sizeof(struct radv_bvh_box32_node) * internal_count; uint32_t offset = 0; offset += sizeof(struct radv_accel_struct_header); @@ -197,8 +196,8 @@ get_build_layout(struct radv_device *device, uint32_t leaf_count, 0, }; if (radv_device_init_accel_struct_build_state(device) == VK_SUCCESS) - radix_sort_vk_get_memory_requirements(device->meta_state.accel_struct_build.radix_sort, - leaf_count, &requirements); + radix_sort_vk_get_memory_requirements(device->meta_state.accel_struct_build.radix_sort, leaf_count, + &requirements); uint32_t offset = 0; @@ -208,8 +207,7 @@ get_build_layout(struct radv_device *device, uint32_t leaf_count, struct build_config config = build_config(leaf_count, build_info); if (config.internal_type == INTERNAL_BUILD_TYPE_PLOC) - ploc_scratch_space = DIV_ROUND_UP(leaf_count, PLOC_WORKGROUP_SIZE) * - sizeof(struct ploc_prefix_scan_partition); + ploc_scratch_space = DIV_ROUND_UP(leaf_count, PLOC_WORKGROUP_SIZE) * sizeof(struct ploc_prefix_scan_partition); else lbvh_node_space = sizeof(struct lbvh_node_info) * internal_count; @@ -238,10 +236,10 @@ get_build_layout(struct radv_device *device, uint32_t leaf_count, } VKAPI_ATTR void VKAPI_CALL -radv_GetAccelerationStructureBuildSizesKHR( - VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType, - const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo, - const uint32_t *pMaxPrimitiveCounts, VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo) +radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType, + const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo, + const uint32_t *pMaxPrimitiveCounts, + VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo) { RADV_FROM_HANDLE(radv_device, device, _device); @@ -265,20 +263,18 @@ radv_GetAccelerationStructureBuildSizesKHR( } VKAPI_ATTR VkResult VKAPI_CALL -radv_WriteAccelerationStructuresPropertiesKHR( - VkDevice _device, uint32_t accelerationStructureCount, - const VkAccelerationStructureKHR *pAccelerationStructures, VkQueryType queryType, - size_t dataSize, void *pData, size_t stride) +radv_WriteAccelerationStructuresPropertiesKHR(VkDevice _device, uint32_t accelerationStructureCount, + const VkAccelerationStructureKHR *pAccelerationStructures, + VkQueryType queryType, size_t dataSize, void *pData, size_t stride) { unreachable("Unimplemented"); return VK_ERROR_FEATURE_NOT_PRESENT; } VKAPI_ATTR VkResult VKAPI_CALL -radv_BuildAccelerationStructuresKHR( - VkDevice _device, VkDeferredOperationKHR deferredOperation, uint32_t infoCount, - const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, - const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos) +radv_BuildAccelerationStructuresKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation, uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, + const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos) { unreachable("Unimplemented"); return VK_ERROR_FEATURE_NOT_PRESENT; @@ -296,59 +292,41 @@ void radv_device_finish_accel_struct_build_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; - radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.copy_pipeline, - &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.ploc_pipeline, - &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->accel_struct_build.ploc_extended_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->accel_struct_build.lbvh_generate_ir_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.lbvh_main_pipeline, - &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.leaf_pipeline, - &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.encode_pipeline, + radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.copy_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.ploc_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.ploc_extended_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.lbvh_generate_ir_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->accel_struct_build.encode_compact_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.header_pipeline, + radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.lbvh_main_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.leaf_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.encode_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.encode_compact_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.morton_pipeline, - &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), - state->accel_struct_build.copy_p_layout, &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), - state->accel_struct_build.ploc_p_layout, &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), - state->accel_struct_build.lbvh_generate_ir_p_layout, &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), - state->accel_struct_build.lbvh_main_p_layout, &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), - state->accel_struct_build.leaf_p_layout, &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), - state->accel_struct_build.encode_p_layout, &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), - state->accel_struct_build.header_p_layout, &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), - state->accel_struct_build.morton_p_layout, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.header_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.morton_pipeline, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.copy_p_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.ploc_p_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.lbvh_generate_ir_p_layout, + &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.lbvh_main_p_layout, + &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.leaf_p_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.encode_p_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.header_p_layout, &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.morton_p_layout, &state->alloc); if (state->accel_struct_build.radix_sort) - radix_sort_vk_destroy(state->accel_struct_build.radix_sort, radv_device_to_handle(device), - &state->alloc); - - radv_DestroyBuffer(radv_device_to_handle(device), state->accel_struct_build.null.buffer, - &state->alloc); - radv_FreeMemory(radv_device_to_handle(device), state->accel_struct_build.null.memory, - &state->alloc); - vk_common_DestroyAccelerationStructureKHR( - radv_device_to_handle(device), state->accel_struct_build.null.accel_struct, &state->alloc); + radix_sort_vk_destroy(state->accel_struct_build.radix_sort, radv_device_to_handle(device), &state->alloc); + + radv_DestroyBuffer(radv_device_to_handle(device), state->accel_struct_build.null.buffer, &state->alloc); + radv_FreeMemory(radv_device_to_handle(device), state->accel_struct_build.null.memory, &state->alloc); + vk_common_DestroyAccelerationStructureKHR(radv_device_to_handle(device), state->accel_struct_build.null.accel_struct, + &state->alloc); } static VkResult create_build_pipeline_spv(struct radv_device *device, const uint32_t *spv, uint32_t spv_size, - unsigned push_constant_size, VkPipeline *pipeline, - VkPipelineLayout *layout) + unsigned push_constant_size, VkPipeline *pipeline, VkPipelineLayout *layout) { if (*pipeline) return VK_SUCCESS; @@ -357,8 +335,7 @@ create_build_pipeline_spv(struct radv_device *device, const uint32_t *spv, uint3 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 0, .pushConstantRangeCount = 1, - .pPushConstantRanges = - &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constant_size}, + .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constant_size}, }; VkShaderModuleCreateInfo module_info = { @@ -370,14 +347,14 @@ create_build_pipeline_spv(struct radv_device *device, const uint32_t *spv, uint3 }; VkShaderModule module; - VkResult result = device->vk.dispatch_table.CreateShaderModule( - radv_device_to_handle(device), &module_info, &device->meta_state.alloc, &module); + VkResult result = device->vk.dispatch_table.CreateShaderModule(radv_device_to_handle(device), &module_info, + &device->meta_state.alloc, &module); if (result != VK_SUCCESS) return result; if (!*layout) { - result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, - &device->meta_state.alloc, layout); + result = + radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, layout); if (result != VK_SUCCESS) goto cleanup; } @@ -397,26 +374,23 @@ create_build_pipeline_spv(struct radv_device *device, const uint32_t *spv, uint3 .layout = *layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &pipeline_info, &device->meta_state.alloc, pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &pipeline_info, + &device->meta_state.alloc, pipeline); cleanup: - device->vk.dispatch_table.DestroyShaderModule(radv_device_to_handle(device), module, - &device->meta_state.alloc); + device->vk.dispatch_table.DestroyShaderModule(radv_device_to_handle(device), module, &device->meta_state.alloc); return result; } static void -radix_sort_fill_buffer(VkCommandBuffer commandBuffer, - radix_sort_vk_buffer_info_t const *buffer_info, VkDeviceSize offset, - VkDeviceSize size, uint32_t data) +radix_sort_fill_buffer(VkCommandBuffer commandBuffer, radix_sort_vk_buffer_info_t const *buffer_info, + VkDeviceSize offset, VkDeviceSize size, uint32_t data) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); assert(size != VK_WHOLE_SIZE); - radv_fill_buffer(cmd_buffer, NULL, NULL, buffer_info->devaddr + buffer_info->offset + offset, - size, data); + radv_fill_buffer(cmd_buffer, NULL, NULL, buffer_info->devaddr + buffer_info->offset + offset, size, data); } VkResult @@ -459,10 +433,9 @@ radv_device_init_null_accel_struct(struct radv_device *device) VkMemoryAllocateInfo alloc_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = mem_req.memoryRequirements.size, - .memoryTypeIndex = - radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), + .memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), }; result = radv_AllocateMemory(_device, &alloc_info, &device->meta_state.alloc, &memory); @@ -521,8 +494,7 @@ radv_device_init_null_accel_struct(struct radv_device *device) .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, }; - result = vk_common_CreateAccelerationStructureKHR(_device, &create_info, - &device->meta_state.alloc, &accel_struct); + result = vk_common_CreateAccelerationStructureKHR(_device, &create_info, &device->meta_state.alloc, &accel_struct); if (result != VK_SUCCESS) return result; @@ -548,18 +520,16 @@ radv_device_init_accel_struct_build_state(struct radv_device *device) if (result != VK_SUCCESS) goto exit; - result = create_build_pipeline_spv(device, lbvh_main_spv, sizeof(lbvh_main_spv), - sizeof(struct lbvh_main_args), + result = create_build_pipeline_spv(device, lbvh_main_spv, sizeof(lbvh_main_spv), sizeof(struct lbvh_main_args), &device->meta_state.accel_struct_build.lbvh_main_pipeline, &device->meta_state.accel_struct_build.lbvh_main_p_layout); if (result != VK_SUCCESS) goto exit; - result = - create_build_pipeline_spv(device, lbvh_generate_ir_spv, sizeof(lbvh_generate_ir_spv), - sizeof(struct lbvh_generate_ir_args), - &device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline, - &device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout); + result = create_build_pipeline_spv(device, lbvh_generate_ir_spv, sizeof(lbvh_generate_ir_spv), + sizeof(struct lbvh_generate_ir_args), + &device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline, + &device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout); if (result != VK_SUCCESS) goto exit; @@ -569,47 +539,41 @@ radv_device_init_accel_struct_build_state(struct radv_device *device) if (result != VK_SUCCESS) goto exit; - result = create_build_pipeline_spv(device, ploc_extended_spv, sizeof(ploc_extended_spv), - sizeof(struct ploc_args), + result = create_build_pipeline_spv(device, ploc_extended_spv, sizeof(ploc_extended_spv), sizeof(struct ploc_args), &device->meta_state.accel_struct_build.ploc_extended_pipeline, &device->meta_state.accel_struct_build.ploc_p_layout); if (result != VK_SUCCESS) goto exit; - result = - create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args), - &device->meta_state.accel_struct_build.encode_pipeline, - &device->meta_state.accel_struct_build.encode_p_layout); + result = create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args), + &device->meta_state.accel_struct_build.encode_pipeline, + &device->meta_state.accel_struct_build.encode_p_layout); if (result != VK_SUCCESS) goto exit; - result = create_build_pipeline_spv( - device, encode_compact_spv, sizeof(encode_compact_spv), sizeof(struct encode_args), - &device->meta_state.accel_struct_build.encode_compact_pipeline, - &device->meta_state.accel_struct_build.encode_p_layout); + result = + create_build_pipeline_spv(device, encode_compact_spv, sizeof(encode_compact_spv), sizeof(struct encode_args), + &device->meta_state.accel_struct_build.encode_compact_pipeline, + &device->meta_state.accel_struct_build.encode_p_layout); if (result != VK_SUCCESS) goto exit; - result = - create_build_pipeline_spv(device, header_spv, sizeof(header_spv), sizeof(struct header_args), - &device->meta_state.accel_struct_build.header_pipeline, - &device->meta_state.accel_struct_build.header_p_layout); + result = create_build_pipeline_spv(device, header_spv, sizeof(header_spv), sizeof(struct header_args), + &device->meta_state.accel_struct_build.header_pipeline, + &device->meta_state.accel_struct_build.header_p_layout); if (result != VK_SUCCESS) goto exit; - result = - create_build_pipeline_spv(device, morton_spv, sizeof(morton_spv), sizeof(struct morton_args), - &device->meta_state.accel_struct_build.morton_pipeline, - &device->meta_state.accel_struct_build.morton_p_layout); + result = create_build_pipeline_spv(device, morton_spv, sizeof(morton_spv), sizeof(struct morton_args), + &device->meta_state.accel_struct_build.morton_pipeline, + &device->meta_state.accel_struct_build.morton_p_layout); if (result != VK_SUCCESS) goto exit; device->meta_state.accel_struct_build.radix_sort = - radv_create_radix_sort_u64(radv_device_to_handle(device), &device->meta_state.alloc, - device->meta_state.cache); + radv_create_radix_sort_u64(radv_device_to_handle(device), &device->meta_state.alloc, device->meta_state.cache); - struct radix_sort_vk_sort_devaddr_info *radix_sort_info = - &device->meta_state.accel_struct_build.radix_sort_info; + struct radix_sort_vk_sort_devaddr_info *radix_sort_info = &device->meta_state.accel_struct_build.radix_sort_info; radix_sort_info->ext = NULL; radix_sort_info->key_bits = 24; radix_sort_info->fill_buffer = radix_sort_fill_buffer; @@ -624,10 +588,9 @@ radv_device_init_accel_struct_copy_state(struct radv_device *device) { mtx_lock(&device->meta_state.mtx); - VkResult result = - create_build_pipeline_spv(device, copy_spv, sizeof(copy_spv), sizeof(struct copy_args), - &device->meta_state.accel_struct_build.copy_pipeline, - &device->meta_state.accel_struct_build.copy_p_layout); + VkResult result = create_build_pipeline_spv(device, copy_spv, sizeof(copy_spv), sizeof(struct copy_args), + &device->meta_state.accel_struct_build.copy_pipeline, + &device->meta_state.accel_struct_build.copy_p_layout); mtx_unlock(&device->meta_state.mtx); return result; @@ -660,8 +623,8 @@ pack_geometry_id_and_flags(uint32_t geometry_id, uint32_t flags) static void build_leaves(VkCommandBuffer commandBuffer, uint32_t infoCount, const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, - const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos, - struct bvh_state *bvh_states, enum radv_cmd_flush_bits flush_bits) + const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos, struct bvh_state *bvh_states, + enum radv_cmd_flush_bits flush_bits) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, @@ -711,8 +674,7 @@ build_leaves(VkCommandBuffer commandBuffer, uint32_t infoCount, case VK_GEOMETRY_TYPE_AABBS_KHR: assert(pInfos[i].type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR); - leaf_consts.data = - geom->geometry.aabbs.data.deviceAddress + buildRangeInfo->primitiveOffset; + leaf_consts.data = geom->geometry.aabbs.data.deviceAddress + buildRangeInfo->primitiveOffset; leaf_consts.stride = geom->geometry.aabbs.stride; prim_size = sizeof(struct radv_ir_aabb_node); @@ -720,8 +682,7 @@ build_leaves(VkCommandBuffer commandBuffer, uint32_t infoCount, case VK_GEOMETRY_TYPE_INSTANCES_KHR: assert(pInfos[i].type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR); - leaf_consts.data = - geom->geometry.instances.data.deviceAddress + buildRangeInfo->primitiveOffset; + leaf_consts.data = geom->geometry.instances.data.deviceAddress + buildRangeInfo->primitiveOffset; if (geom->geometry.instances.arrayOfPointers) leaf_consts.stride = 8; @@ -734,8 +695,7 @@ build_leaves(VkCommandBuffer commandBuffer, uint32_t infoCount, unreachable("Unknown geometryType"); } - radv_CmdPushConstants(commandBuffer, - cmd_buffer->device->meta_state.accel_struct_build.leaf_p_layout, + radv_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.leaf_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(leaf_consts), &leaf_consts); radv_unaligned_dispatch(cmd_buffer, buildRangeInfo->primitiveCount, 1, 1); @@ -752,8 +712,8 @@ build_leaves(VkCommandBuffer commandBuffer, uint32_t infoCount, static void morton_generate(VkCommandBuffer commandBuffer, uint32_t infoCount, - const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, - struct bvh_state *bvh_states, enum radv_cmd_flush_bits flush_bits) + const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states, + enum radv_cmd_flush_bits flush_bits) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, @@ -766,8 +726,7 @@ morton_generate(VkCommandBuffer commandBuffer, uint32_t infoCount, .ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0], }; - radv_CmdPushConstants(commandBuffer, - cmd_buffer->device->meta_state.accel_struct_build.morton_p_layout, + radv_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.morton_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts); radv_unaligned_dispatch(cmd_buffer, bvh_states[i].node_count, 1, 1); } @@ -783,31 +742,25 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount, RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); for (uint32_t i = 0; i < infoCount; ++i) { struct radix_sort_vk_memory_requirements requirements; - radix_sort_vk_get_memory_requirements( - cmd_buffer->device->meta_state.accel_struct_build.radix_sort, bvh_states[i].node_count, - &requirements); + radix_sort_vk_get_memory_requirements(cmd_buffer->device->meta_state.accel_struct_build.radix_sort, + bvh_states[i].node_count, &requirements); - struct radix_sort_vk_sort_devaddr_info info = - cmd_buffer->device->meta_state.accel_struct_build.radix_sort_info; + struct radix_sort_vk_sort_devaddr_info info = cmd_buffer->device->meta_state.accel_struct_build.radix_sort_info; info.count = bvh_states[i].node_count; info.keyvals_even.buffer = VK_NULL_HANDLE; info.keyvals_even.offset = 0; - info.keyvals_even.devaddr = - pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0]; + info.keyvals_even.devaddr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0]; - info.keyvals_odd = - pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1]; + info.keyvals_odd = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1]; info.internal.buffer = VK_NULL_HANDLE; info.internal.offset = 0; - info.internal.devaddr = - pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset; + info.internal.devaddr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset; VkDeviceAddress result_addr; - radix_sort_vk_sort_devaddr(cmd_buffer->device->meta_state.accel_struct_build.radix_sort, - &info, radv_device_to_handle(cmd_buffer->device), commandBuffer, - &result_addr); + radix_sort_vk_sort_devaddr(cmd_buffer->device->meta_state.accel_struct_build.radix_sort, &info, + radv_device_to_handle(cmd_buffer->device), commandBuffer, &result_addr); assert(result_addr == info.keyvals_even.devaddr || result_addr == info.keyvals_odd); @@ -819,8 +772,8 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount, static void lbvh_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount, - const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, - struct bvh_state *bvh_states, enum radv_cmd_flush_bits flush_bits) + const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states, + enum radv_cmd_flush_bits flush_bits) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, @@ -840,8 +793,7 @@ lbvh_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount, .internal_node_base = bvh_states[i].internal_node_base, }; - radv_CmdPushConstants(commandBuffer, - cmd_buffer->device->meta_state.accel_struct_build.lbvh_main_p_layout, + radv_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.lbvh_main_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts); radv_unaligned_dispatch(cmd_buffer, internal_node_count, 1, 1); bvh_states[i].node_count = internal_node_count; @@ -850,9 +802,8 @@ lbvh_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount, cmd_buffer->state.flush_bits |= flush_bits; - radv_CmdBindPipeline( - commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, - cmd_buffer->device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline); + radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, + cmd_buffer->device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline); for (uint32_t i = 0; i < infoCount; ++i) { if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_LBVH) @@ -865,23 +816,21 @@ lbvh_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount, .internal_node_base = bvh_states[i].internal_node_base, }; - radv_CmdPushConstants( - commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts); + radv_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts); radv_unaligned_dispatch(cmd_buffer, bvh_states[i].internal_node_count, 1, 1); } } static void ploc_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount, - const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, - struct bvh_state *bvh_states, bool extended_sah) + const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states, + bool extended_sah) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_CmdBindPipeline( - commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, - extended_sah ? cmd_buffer->device->meta_state.accel_struct_build.ploc_extended_pipeline - : cmd_buffer->device->meta_state.accel_struct_build.ploc_pipeline); + radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, + extended_sah ? cmd_buffer->device->meta_state.accel_struct_build.ploc_extended_pipeline + : cmd_buffer->device->meta_state.accel_struct_build.ploc_pipeline); for (uint32_t i = 0; i < infoCount; ++i) { if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_PLOC) @@ -895,45 +844,39 @@ ploc_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount, .task_counts = {TASK_INDEX_INVALID, TASK_INDEX_INVALID}, }; radv_update_buffer_cp(cmd_buffer, - pInfos[i].scratchData.deviceAddress + - bvh_states[i].scratch.header_offset + + pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset + offsetof(struct radv_ir_header, sync_data), &initial_sync_data, sizeof(struct radv_global_sync_data)); uint32_t src_scratch_offset = bvh_states[i].scratch_offset; - uint32_t dst_scratch_offset = - (src_scratch_offset == bvh_states[i].scratch.sort_buffer_offset[0]) - ? bvh_states[i].scratch.sort_buffer_offset[1] - : bvh_states[i].scratch.sort_buffer_offset[0]; + uint32_t dst_scratch_offset = (src_scratch_offset == bvh_states[i].scratch.sort_buffer_offset[0]) + ? bvh_states[i].scratch.sort_buffer_offset[1] + : bvh_states[i].scratch.sort_buffer_offset[0]; const struct ploc_args consts = { .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset, .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset, .ids_0 = pInfos[i].scratchData.deviceAddress + src_scratch_offset, .ids_1 = pInfos[i].scratchData.deviceAddress + dst_scratch_offset, - .prefix_scan_partitions = pInfos[i].scratchData.deviceAddress + - bvh_states[i].scratch.ploc_prefix_sum_partition_offset, + .prefix_scan_partitions = + pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ploc_prefix_sum_partition_offset, .internal_node_offset = bvh_states[i].internal_node_base, }; - radv_CmdPushConstants(commandBuffer, - cmd_buffer->device->meta_state.accel_struct_build.ploc_p_layout, + radv_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.ploc_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts); - vk_common_CmdDispatch(commandBuffer, - MAX2(DIV_ROUND_UP(bvh_states[i].node_count, PLOC_WORKGROUP_SIZE), 1), 1, 1); + vk_common_CmdDispatch(commandBuffer, MAX2(DIV_ROUND_UP(bvh_states[i].node_count, PLOC_WORKGROUP_SIZE), 1), 1, 1); } } static void encode_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount, - const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, - struct bvh_state *bvh_states, bool compact) + const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states, bool compact) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_CmdBindPipeline( - commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, - compact ? cmd_buffer->device->meta_state.accel_struct_build.encode_compact_pipeline - : cmd_buffer->device->meta_state.accel_struct_build.encode_pipeline); + radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, + compact ? cmd_buffer->device->meta_state.accel_struct_build.encode_compact_pipeline + : cmd_buffer->device->meta_state.accel_struct_build.encode_pipeline); for (uint32_t i = 0; i < infoCount; ++i) { if (compact != bvh_states[i].config.compact) @@ -947,8 +890,8 @@ encode_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount, * because it will be multiplied with 0. */ if (pInfos[i].geometryCount) - geometry_type = pInfos[i].pGeometries ? pInfos[i].pGeometries[0].geometryType - : pInfos[i].ppGeometries[0]->geometryType; + geometry_type = + pInfos[i].pGeometries ? pInfos[i].pGeometries[0].geometryType : pInfos[i].ppGeometries[0]->geometryType; if (bvh_states[i].config.compact) { uint32_t leaf_node_size = 0; @@ -966,26 +909,22 @@ encode_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount, unreachable(""); } - uint32_t dst_offset = - sizeof(struct radv_bvh_box32_node) + bvh_states[i].leaf_node_count * leaf_node_size; + uint32_t dst_offset = sizeof(struct radv_bvh_box32_node) + bvh_states[i].leaf_node_count * leaf_node_size; radv_update_buffer_cp(cmd_buffer, - pInfos[i].scratchData.deviceAddress + - bvh_states[i].scratch.header_offset + + pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset + offsetof(struct radv_ir_header, dst_node_offset), &dst_offset, sizeof(uint32_t)); } const struct encode_args args = { .intermediate_bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset, - .output_bvh = - vk_acceleration_structure_get_va(accel_struct) + bvh_states[i].accel_struct.bvh_offset, + .output_bvh = vk_acceleration_structure_get_va(accel_struct) + bvh_states[i].accel_struct.bvh_offset, .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset, .output_bvh_offset = bvh_states[i].accel_struct.bvh_offset, .leaf_node_count = bvh_states[i].leaf_node_count, .geometry_type = geometry_type, }; - radv_CmdPushConstants(commandBuffer, - cmd_buffer->device->meta_state.accel_struct_build.encode_p_layout, + radv_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.encode_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args); struct radv_dispatch_info dispatch = { @@ -1012,9 +951,8 @@ init_header(VkCommandBuffer commandBuffer, uint32_t infoCount, RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure); size_t base = offsetof(struct radv_accel_struct_header, compacted_size); - uint64_t instance_count = pInfos[i].type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR - ? bvh_states[i].leaf_node_count - : 0; + uint64_t instance_count = + pInfos[i].type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR ? bvh_states[i].leaf_node_count : 0; if (bvh_states[i].config.compact) { base = offsetof(struct radv_accel_struct_header, geometry_count); @@ -1026,8 +964,7 @@ init_header(VkCommandBuffer commandBuffer, uint32_t infoCount, .instance_count = instance_count, }; - radv_CmdPushConstants(commandBuffer, - cmd_buffer->device->meta_state.accel_struct_build.header_p_layout, + radv_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.header_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args); radv_unaligned_dispatch(cmd_buffer, 1, 1, 1); @@ -1035,8 +972,7 @@ init_header(VkCommandBuffer commandBuffer, uint32_t infoCount, struct radv_accel_struct_header header; - header.instance_offset = - bvh_states[i].accel_struct.bvh_offset + sizeof(struct radv_bvh_box32_node); + header.instance_offset = bvh_states[i].accel_struct.bvh_offset + sizeof(struct radv_bvh_box32_node); header.instance_count = instance_count; header.compacted_size = bvh_states[i].accel_struct.size; @@ -1045,12 +981,10 @@ init_header(VkCommandBuffer commandBuffer, uint32_t infoCount, header.copy_dispatch_size[2] = 1; header.serialization_size = - header.compacted_size + align(sizeof(struct radv_accel_struct_serialization_header) + - sizeof(uint64_t) * header.instance_count, - 128); + header.compacted_size + + align(sizeof(struct radv_accel_struct_serialization_header) + sizeof(uint64_t) * header.instance_count, 128); - header.size = header.serialization_size - - sizeof(struct radv_accel_struct_serialization_header) - + header.size = header.serialization_size - sizeof(struct radv_accel_struct_serialization_header) - sizeof(uint64_t) * header.instance_count; header.build_flags = pInfos[i].flags; @@ -1063,15 +997,13 @@ init_header(VkCommandBuffer commandBuffer, uint32_t infoCount, static void init_geometry_infos(VkCommandBuffer commandBuffer, uint32_t infoCount, - const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, - struct bvh_state *bvh_states, + const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states, const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos) { for (uint32_t i = 0; i < infoCount; ++i) { RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure); - uint64_t geometry_infos_size = - pInfos[i].geometryCount * sizeof(struct radv_accel_struct_geometry_info); + uint64_t geometry_infos_size = pInfos[i].geometryCount * sizeof(struct radv_accel_struct_geometry_info); struct radv_accel_struct_geometry_info *geometry_infos = malloc(geometry_infos_size); if (!geometry_infos) @@ -1086,18 +1018,17 @@ init_geometry_infos(VkCommandBuffer commandBuffer, uint32_t infoCount, } radv_CmdUpdateBuffer(commandBuffer, accel_struct->buffer, - accel_struct->offset + bvh_states[i].accel_struct.geometry_info_offset, - geometry_infos_size, geometry_infos); + accel_struct->offset + bvh_states[i].accel_struct.geometry_info_offset, geometry_infos_size, + geometry_infos); free(geometry_infos); } } VKAPI_ATTR void VKAPI_CALL -radv_CmdBuildAccelerationStructuresKHR( - VkCommandBuffer commandBuffer, uint32_t infoCount, - const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, - const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos) +radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, + const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_meta_saved_state saved_state; @@ -1110,14 +1041,11 @@ radv_CmdBuildAccelerationStructuresKHR( enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, - NULL) | - radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, - NULL); - - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS); + radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, NULL) | + radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, NULL); + + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS); struct bvh_state *bvh_states = calloc(infoCount, sizeof(struct bvh_state)); for (uint32_t i = 0; i < infoCount; ++i) { @@ -1139,9 +1067,8 @@ radv_CmdBuildAccelerationStructuresKHR( .dispatch_size_z = 1, }; - radv_update_buffer_cp( - cmd_buffer, pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset, - &header, sizeof(header)); + radv_update_buffer_cp(cmd_buffer, pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset, + &header, sizeof(header)); } cmd_buffer->state.flush_bits |= flush_bits; @@ -1176,8 +1103,7 @@ radv_CmdBuildAccelerationStructuresKHR( } VKAPI_ATTR void VKAPI_CALL -radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, - const VkCopyAccelerationStructureInfoKHR *pInfo) +radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src); @@ -1191,9 +1117,8 @@ radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, return; } - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS); radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, cmd_buffer->device->meta_state.accel_struct_build.copy_pipeline); @@ -1205,35 +1130,32 @@ radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, }; radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts); + cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(consts), &consts); - cmd_buffer->state.flush_bits |= - radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL); + cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL); - radv_indirect_dispatch(cmd_buffer, src_buffer->bo, - vk_acceleration_structure_get_va(src) + - offsetof(struct radv_accel_struct_header, copy_dispatch_size)); + radv_indirect_dispatch( + cmd_buffer, src_buffer->bo, + vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size)); radv_meta_restore(&saved_state, cmd_buffer); } VKAPI_ATTR void VKAPI_CALL -radv_GetDeviceAccelerationStructureCompatibilityKHR( - VkDevice _device, const VkAccelerationStructureVersionInfoKHR *pVersionInfo, - VkAccelerationStructureCompatibilityKHR *pCompatibility) +radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device, + const VkAccelerationStructureVersionInfoKHR *pVersionInfo, + VkAccelerationStructureCompatibilityKHR *pCompatibility) { RADV_FROM_HANDLE(radv_device, device, _device); bool compat = memcmp(pVersionInfo->pVersionData, device->physical_device->driver_uuid, VK_UUID_SIZE) == 0 && - memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, device->physical_device->cache_uuid, - VK_UUID_SIZE) == 0; + memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, device->physical_device->cache_uuid, VK_UUID_SIZE) == 0; *pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR : VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR; } VKAPI_ATTR VkResult VKAPI_CALL -radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device, - VkDeferredOperationKHR deferredOperation, +radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation, const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo) { unreachable("Unimplemented"); @@ -1241,8 +1163,7 @@ radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device, } VKAPI_ATTR VkResult VKAPI_CALL -radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device, - VkDeferredOperationKHR deferredOperation, +radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation, const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo) { unreachable("Unimplemented"); @@ -1250,8 +1171,8 @@ radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device, } VKAPI_ATTR void VKAPI_CALL -radv_CmdCopyMemoryToAccelerationStructureKHR( - VkCommandBuffer commandBuffer, const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo) +radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer, + const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst); @@ -1263,9 +1184,8 @@ radv_CmdCopyMemoryToAccelerationStructureKHR( return; } - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS); radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, cmd_buffer->device->meta_state.accel_struct_build.copy_pipeline); @@ -1277,16 +1197,16 @@ radv_CmdCopyMemoryToAccelerationStructureKHR( }; radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts); + cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(consts), &consts); vk_common_CmdDispatch(commandBuffer, 512, 1, 1); radv_meta_restore(&saved_state, cmd_buffer); } VKAPI_ATTR void VKAPI_CALL -radv_CmdCopyAccelerationStructureToMemoryKHR( - VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo) +radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src); @@ -1299,9 +1219,8 @@ radv_CmdCopyAccelerationStructureToMemoryKHR( return; } - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS); radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, cmd_buffer->device->meta_state.accel_struct_build.copy_pipeline); @@ -1313,32 +1232,30 @@ radv_CmdCopyAccelerationStructureToMemoryKHR( }; radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts); + cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(consts), &consts); - cmd_buffer->state.flush_bits |= - radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL); + cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL); - radv_indirect_dispatch(cmd_buffer, src_buffer->bo, - vk_acceleration_structure_get_va(src) + - offsetof(struct radv_accel_struct_header, copy_dispatch_size)); + radv_indirect_dispatch( + cmd_buffer, src_buffer->bo, + vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size)); radv_meta_restore(&saved_state, cmd_buffer); /* Set the header of the serialized data. */ uint8_t header_data[2 * VK_UUID_SIZE]; memcpy(header_data, cmd_buffer->device->physical_device->driver_uuid, VK_UUID_SIZE); - memcpy(header_data + VK_UUID_SIZE, cmd_buffer->device->physical_device->cache_uuid, - VK_UUID_SIZE); + memcpy(header_data + VK_UUID_SIZE, cmd_buffer->device->physical_device->cache_uuid, VK_UUID_SIZE); radv_update_buffer_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data)); } VKAPI_ATTR void VKAPI_CALL -radv_CmdBuildAccelerationStructuresIndirectKHR( - VkCommandBuffer commandBuffer, uint32_t infoCount, - const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, - const VkDeviceAddress *pIndirectDeviceAddresses, const uint32_t *pIndirectStrides, - const uint32_t *const *ppMaxPrimitiveCounts) +radv_CmdBuildAccelerationStructuresIndirectKHR(VkCommandBuffer commandBuffer, uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, + const VkDeviceAddress *pIndirectDeviceAddresses, + const uint32_t *pIndirectStrides, + const uint32_t *const *ppMaxPrimitiveCounts) { unreachable("Unimplemented"); } diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h index 88c9730..f97987d 100644 --- a/src/amd/vulkan/radv_aco_shader_info.h +++ b/src/amd/vulkan/radv_aco_shader_info.h @@ -31,7 +31,7 @@ #include "aco_shader_info.h" -#define ASSIGN_FIELD(x) aco_info->x = radv->x +#define ASSIGN_FIELD(x) aco_info->x = radv->x #define ASSIGN_FIELD_CP(x) memcpy(&aco_info->x, &radv->x, sizeof(radv->x)) static inline void radv_aco_convert_ps_epilog_key(struct aco_ps_epilog_info *aco_info, @@ -40,8 +40,7 @@ static inline void radv_aco_convert_ps_epilog_key(struct aco_ps_epilog_info *aco static inline void radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv_shader_info *radv, - const struct radv_shader_args *radv_args, - const struct radv_pipeline_key *radv_key) + const struct radv_shader_args *radv_args, const struct radv_pipeline_key *radv_key) { ASSIGN_FIELD(wave_size); ASSIGN_FIELD(is_ngg); @@ -67,11 +66,10 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv aco_info->ps.epilog_pc = radv_args->ps_epilog_pc; } -#define ASSIGN_VS_STATE_FIELD(x) aco_info->state.x = radv->state->x +#define ASSIGN_VS_STATE_FIELD(x) aco_info->state.x = radv->state->x #define ASSIGN_VS_STATE_FIELD_CP(x) memcpy(&aco_info->state.x, &radv->state->x, sizeof(radv->state->x)) static inline void -radv_aco_convert_vs_prolog_key(struct aco_vs_prolog_info *aco_info, - const struct radv_vs_prolog_key *radv, +radv_aco_convert_vs_prolog_key(struct aco_vs_prolog_info *aco_info, const struct radv_vs_prolog_key *radv, const struct radv_shader_args *radv_args) { ASSIGN_VS_STATE_FIELD(instance_rate_inputs); @@ -90,8 +88,7 @@ radv_aco_convert_vs_prolog_key(struct aco_vs_prolog_info *aco_info, } static inline void -radv_aco_convert_ps_epilog_key(struct aco_ps_epilog_info *aco_info, - const struct radv_ps_epilog_key *radv, +radv_aco_convert_ps_epilog_key(struct aco_ps_epilog_info *aco_info, const struct radv_ps_epilog_key *radv, const struct radv_shader_args *radv_args) { ASSIGN_FIELD(spi_shader_col_format); @@ -104,8 +101,7 @@ radv_aco_convert_ps_epilog_key(struct aco_ps_epilog_info *aco_info, } static inline void -radv_aco_convert_opts(struct aco_compiler_options *aco_info, - const struct radv_nir_compiler_options *radv, +radv_aco_convert_opts(struct aco_compiler_options *aco_info, const struct radv_nir_compiler_options *radv, const struct radv_shader_args *radv_args) { ASSIGN_FIELD(dump_shader); diff --git a/src/amd/vulkan/radv_android.c b/src/amd/vulkan/radv_android.c index 0a6e284..d440a0d 100644 --- a/src/amd/vulkan/radv_android.c +++ b/src/amd/vulkan/radv_android.c @@ -108,8 +108,8 @@ radv_hal_close(struct hw_device_t *dev) VkResult radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info, - const VkNativeBufferANDROID *gralloc_info, - const VkAllocationCallbacks *alloc, VkImage *out_image_h) + const VkNativeBufferANDROID *gralloc_info, const VkAllocationCallbacks *alloc, + VkImage *out_image_h) { RADV_FROM_HANDLE(radv_device, device, device_h); @@ -194,12 +194,10 @@ radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info, radv_image_override_offset_stride(device, image, 0, gralloc_info->stride); - VkBindImageMemoryInfo bind_info = { - .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO, - .image = image_h, - .memory = memory_h, - .memoryOffset = 0 - }; + VkBindImageMemoryInfo bind_info = {.sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO, + .image = image_h, + .memory = memory_h, + .memoryOffset = 0}; radv_BindImageMemory2(device_h, 1, &bind_info); image->owned_memory = memory_h; @@ -214,8 +212,8 @@ fail_create_image: } VkResult -radv_GetSwapchainGrallocUsageANDROID(VkDevice device_h, VkFormat format, - VkImageUsageFlags imageUsage, int *grallocUsage) +radv_GetSwapchainGrallocUsageANDROID(VkDevice device_h, VkFormat format, VkImageUsageFlags imageUsage, + int *grallocUsage) { RADV_FROM_HANDLE(radv_device, device, device_h); struct radv_physical_device *phys_dev = device->physical_device; @@ -250,8 +248,7 @@ radv_GetSwapchainGrallocUsageANDROID(VkDevice device_h, VkFormat format, }; /* Check that requested format and usage are supported. */ - result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, &image_format_info, - &image_format_props); + result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, &image_format_info, &image_format_props); if (result != VK_SUCCESS) { return vk_errorf(device, result, "radv_GetPhysicalDeviceImageFormatProperties2 failed " @@ -262,8 +259,8 @@ radv_GetSwapchainGrallocUsageANDROID(VkDevice device_h, VkFormat format, if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) *grallocUsage |= GRALLOC_USAGE_HW_RENDER; - if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) + if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE; /* All VkImageUsageFlags not explicitly checked here are unsupported for @@ -282,8 +279,7 @@ radv_GetSwapchainGrallocUsageANDROID(VkDevice device_h, VkFormat format, * what we need for 30-bit colors. */ if (format == VK_FORMAT_B8G8R8A8_UNORM || format == VK_FORMAT_B5G6R5_UNORM_PACK16) { - *grallocUsage |= - GRALLOC_USAGE_HW_FB | GRALLOC_USAGE_HW_COMPOSER | GRALLOC_USAGE_EXTERNAL_DISP; + *grallocUsage |= GRALLOC_USAGE_HW_FB | GRALLOC_USAGE_HW_COMPOSER | GRALLOC_USAGE_EXTERNAL_DISP; } if (*grallocUsage == 0) @@ -293,11 +289,9 @@ radv_GetSwapchainGrallocUsageANDROID(VkDevice device_h, VkFormat format, } VkResult -radv_GetSwapchainGrallocUsage2ANDROID(VkDevice device_h, VkFormat format, - VkImageUsageFlags imageUsage, +radv_GetSwapchainGrallocUsage2ANDROID(VkDevice device_h, VkFormat format, VkImageUsageFlags imageUsage, VkSwapchainImageUsageFlagsANDROID swapchainImageUsage, - uint64_t *grallocConsumerUsage, - uint64_t *grallocProducerUsage) + uint64_t *grallocConsumerUsage, uint64_t *grallocProducerUsage) { /* Before level 26 (Android 8.0/Oreo) the loader uses * vkGetSwapchainGrallocUsageANDROID. */ @@ -327,8 +321,7 @@ radv_GetSwapchainGrallocUsage2ANDROID(VkDevice device_h, VkFormat format, }; /* Check that requested format and usage are supported. */ - result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, &image_format_info, - &image_format_props); + result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, &image_format_info, &image_format_props); if (result != VK_SUCCESS) { return vk_errorf(device, result, "radv_GetPhysicalDeviceImageFormatProperties2 failed " @@ -336,14 +329,13 @@ radv_GetSwapchainGrallocUsage2ANDROID(VkDevice device_h, VkFormat format, __func__); } - if (unmask32(&imageUsage, - VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) { + if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) { *grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET; *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_CLIENT_TARGET; } - if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) { + if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) { *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_GPU_TEXTURE; } @@ -421,8 +413,7 @@ get_ahb_buffer_format_properties(VkDevice device_h, const struct AHardwareBuffer AHardwareBuffer_describe(buffer, &desc); /* Verify description. */ - const uint64_t gpu_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE | - AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT | + const uint64_t gpu_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE | AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT | AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; /* "Buffer must be a valid Android hardware buffer object with at least @@ -437,12 +428,10 @@ get_ahb_buffer_format_properties(VkDevice device_h, const struct AHardwareBuffer p->format = vk_format_from_android(desc.format, desc.usage); p->externalFormat = (uint64_t)(uintptr_t)p->format; - VkFormatProperties2 format_properties = { - .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2 - }; + VkFormatProperties2 format_properties = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2}; - radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device), - p->format, &format_properties); + radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device), p->format, + &format_properties); if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) p->formatFeatures = format_properties.formatProperties.linearTilingFeatures; @@ -497,8 +486,7 @@ get_ahb_buffer_format_properties2(VkDevice device_h, const struct AHardwareBuffe AHardwareBuffer_describe(buffer, &desc); /* Verify description. */ - const uint64_t gpu_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE | - AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT | + const uint64_t gpu_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE | AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT | AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; /* "Buffer must be a valid Android hardware buffer object with at least @@ -513,12 +501,10 @@ get_ahb_buffer_format_properties2(VkDevice device_h, const struct AHardwareBuffe p->format = vk_format_from_android(desc.format, desc.usage); p->externalFormat = (uint64_t)(uintptr_t)p->format; - VkFormatProperties2 format_properties = { - .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2 - }; + VkFormatProperties2 format_properties = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2}; - radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device), - p->format, &format_properties); + radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device), p->format, + &format_properties); if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) p->formatFeatures = format_properties.formatProperties.linearTilingFeatures; @@ -563,8 +549,7 @@ get_ahb_buffer_format_properties2(VkDevice device_h, const struct AHardwareBuffe } VkResult -radv_GetAndroidHardwareBufferPropertiesANDROID(VkDevice device_h, - const struct AHardwareBuffer *buffer, +radv_GetAndroidHardwareBufferPropertiesANDROID(VkDevice device_h, const struct AHardwareBuffer *buffer, VkAndroidHardwareBufferPropertiesANDROID *pProperties) { RADV_FROM_HANDLE(radv_device, dev, device_h); @@ -602,8 +587,7 @@ radv_GetAndroidHardwareBufferPropertiesANDROID(VkDevice device_h, } VkResult -radv_GetMemoryAndroidHardwareBufferANDROID(VkDevice device_h, - const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo, +radv_GetMemoryAndroidHardwareBufferANDROID(VkDevice device_h, const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo, struct AHardwareBuffer **pBuffer) { RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory); @@ -634,8 +618,7 @@ VkFormat radv_select_android_external_format(const void *next, VkFormat default_format) { #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER - const VkExternalFormatANDROID *android_format = - vk_find_struct_const(next, EXTERNAL_FORMAT_ANDROID); + const VkExternalFormatANDROID *android_format = vk_find_struct_const(next, EXTERNAL_FORMAT_ANDROID); if (android_format && android_format->externalFormat) { return (VkFormat)android_format->externalFormat; @@ -646,8 +629,8 @@ radv_select_android_external_format(const void *next, VkFormat default_format) } VkResult -radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, - unsigned priority, const VkImportAndroidHardwareBufferInfoANDROID *info) +radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, unsigned priority, + const VkImportAndroidHardwareBufferInfoANDROID *info) { #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER /* Import from AHardwareBuffer to radv_device_memory. */ @@ -663,8 +646,7 @@ radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *me return VK_ERROR_INVALID_EXTERNAL_HANDLE; uint64_t alloc_size = 0; - VkResult result = - device->ws->buffer_from_fd(device->ws, dma_buf, priority, &mem->bo, &alloc_size); + VkResult result = device->ws->buffer_from_fd(device->ws, dma_buf, priority, &mem->bo, &alloc_size); if (result != VK_SUCCESS) return result; @@ -672,8 +654,7 @@ radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *me struct radeon_bo_metadata metadata; device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata); - struct radv_image_create_info create_info = {.no_metadata_planes = true, - .bo_metadata = &metadata}; + struct radv_image_create_info create_info = {.no_metadata_planes = true, .bo_metadata = &metadata}; result = radv_image_create_layout(device, create_info, NULL, NULL, mem->image); if (result != VK_SUCCESS) { @@ -710,8 +691,8 @@ radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *me } VkResult -radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, - unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo) +radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, unsigned priority, + const VkMemoryAllocateInfo *pAllocateInfo) { #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER mem->android_hardware_buffer = vk_alloc_ahardware_buffer(pAllocateInfo); diff --git a/src/amd/vulkan/radv_buffer.c b/src/amd/vulkan/radv_buffer.c index c563283..601130e 100644 --- a/src/amd/vulkan/radv_buffer.c +++ b/src/amd/vulkan/radv_buffer.c @@ -31,8 +31,7 @@ #include "vk_common_entrypoints.h" void -radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device, - struct radeon_winsys_bo *bo, uint64_t size, +radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device, struct radeon_winsys_bo *bo, uint64_t size, uint64_t offset) { VkBufferCreateInfo createInfo = { @@ -53,8 +52,7 @@ radv_buffer_finish(struct radv_buffer *buffer) } static void -radv_destroy_buffer(struct radv_device *device, const VkAllocationCallbacks *pAllocator, - struct radv_buffer *buffer) +radv_destroy_buffer(struct radv_device *device, const VkAllocationCallbacks *pAllocator, struct radv_buffer *buffer) { if ((buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo) device->ws->buffer_destroy(device->ws, buffer->bo); @@ -80,8 +78,7 @@ radv_create_buffer(struct radv_device *device, const VkBufferCreateInfo *pCreate return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); #endif - buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (buffer == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -100,9 +97,8 @@ radv_create_buffer(struct radv_device *device, const VkBufferCreateInfo *pCreate if (replay_info && replay_info->opaqueCaptureAddress) replay_address = replay_info->opaqueCaptureAddress; - VkResult result = - device->ws->buffer_create(device->ws, align64(buffer->vk.size, 4096), 4096, 0, flags, - RADV_BO_PRIORITY_VIRTUAL, replay_address, &buffer->bo); + VkResult result = device->ws->buffer_create(device->ws, align64(buffer->vk.size, 4096), 4096, 0, flags, + RADV_BO_PRIORITY_VIRTUAL, replay_address, &buffer->bo); if (result != VK_SUCCESS) { radv_destroy_buffer(device, pAllocator, buffer); return vk_error(device, result); @@ -118,8 +114,8 @@ radv_create_buffer(struct radv_device *device, const VkBufferCreateInfo *pCreate } VKAPI_ATTR VkResult VKAPI_CALL -radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer) +radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, + VkBuffer *pBuffer) { RADV_FROM_HANDLE(radv_device, device, _device); return radv_create_buffer(device, pCreateInfo, pAllocator, pBuffer, false); @@ -138,8 +134,7 @@ radv_DestroyBuffer(VkDevice _device, VkBuffer _buffer, const VkAllocationCallbac } VKAPI_ATTR VkResult VKAPI_CALL -radv_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount, - const VkBindBufferMemoryInfo *pBindInfos) +radv_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount, const VkBindBufferMemoryInfo *pBindInfos) { RADV_FROM_HANDLE(radv_device, device, _device); @@ -159,8 +154,7 @@ radv_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount, vk_common_GetBufferMemoryRequirements2(_device, &info, &reqs); if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) { - return vk_errorf(device, VK_ERROR_UNKNOWN, - "Device memory object too small for the buffer.\n"); + return vk_errorf(device, VK_ERROR_UNKNOWN, "Device memory object too small for the buffer.\n"); } } @@ -172,9 +166,8 @@ radv_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount, } static void -radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize size, - VkBufferCreateFlags flags, VkBufferUsageFlags usage, - VkMemoryRequirements2 *pMemoryRequirements) +radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize size, VkBufferCreateFlags flags, + VkBufferUsageFlags usage, VkMemoryRequirements2 *pMemoryRequirements) { pMemoryRequirements->memoryRequirements.memoryTypeBits = ((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) & @@ -190,16 +183,13 @@ radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize siz * intersection is non-zero at least) */ if ((usage & VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT) && device->uses_device_generated_commands) - pMemoryRequirements->memoryRequirements.memoryTypeBits |= - device->physical_device->memory_types_32bit; + pMemoryRequirements->memoryRequirements.memoryTypeBits |= device->physical_device->memory_types_32bit; /* Force 32-bit address-space for descriptor buffers usage because they are passed to shaders * through 32-bit pointers. */ - if (usage & (VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | - VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT)) - pMemoryRequirements->memoryRequirements.memoryTypeBits = - device->physical_device->memory_types_32bit; + if (usage & (VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT)) + pMemoryRequirements->memoryRequirements.memoryTypeBits = device->physical_device->memory_types_32bit; if (flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) pMemoryRequirements->memoryRequirements.alignment = 4096; @@ -211,14 +201,11 @@ radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize siz * be 64 byte aligned. */ if (usage & VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR) - pMemoryRequirements->memoryRequirements.alignment = - MAX2(pMemoryRequirements->memoryRequirements.alignment, 64); + pMemoryRequirements->memoryRequirements.alignment = MAX2(pMemoryRequirements->memoryRequirements.alignment, 64); - pMemoryRequirements->memoryRequirements.size = - align64(size, pMemoryRequirements->memoryRequirements.alignment); + pMemoryRequirements->memoryRequirements.size = align64(size, pMemoryRequirements->memoryRequirements.alignment); - vk_foreach_struct(ext, pMemoryRequirements->pNext) - { + vk_foreach_struct (ext, pMemoryRequirements->pNext) { switch (ext->sType) { case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext; @@ -233,8 +220,7 @@ radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize siz } VKAPI_ATTR void VKAPI_CALL -radv_GetDeviceBufferMemoryRequirements(VkDevice _device, - const VkDeviceBufferMemoryRequirements *pInfo, +radv_GetDeviceBufferMemoryRequirements(VkDevice _device, const VkDeviceBufferMemoryRequirements *pInfo, VkMemoryRequirements2 *pMemoryRequirements) { RADV_FROM_HANDLE(radv_device, device, _device); diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 5be68af..50ef321 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -25,19 +25,19 @@ * IN THE SOFTWARE. */ +#include "meta/radv_meta.h" #include "radv_cs.h" #include "radv_debug.h" -#include "meta/radv_meta.h" #include "radv_private.h" #include "radv_radeon_winsys.h" #include "radv_shader.h" #include "sid.h" -#include "vk_format.h" -#include "vk_util.h" -#include "vk_enum_defines.h" #include "vk_common_entrypoints.h" -#include "vk_render_pass.h" +#include "vk_enum_defines.h" +#include "vk_format.h" #include "vk_framebuffer.h" +#include "vk_render_pass.h" +#include "vk_util.h" #include "ac_debug.h" #include "ac_shader_args.h" @@ -52,15 +52,13 @@ enum { RADV_PREFETCH_GS = (1 << 4), RADV_PREFETCH_PS = (1 << 5), RADV_PREFETCH_MS = (1 << 6), - RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS | RADV_PREFETCH_TCS | RADV_PREFETCH_TES | - RADV_PREFETCH_GS | RADV_PREFETCH_PS | RADV_PREFETCH_MS) + RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS | RADV_PREFETCH_TCS | RADV_PREFETCH_TES | RADV_PREFETCH_GS | + RADV_PREFETCH_PS | RADV_PREFETCH_MS) }; -static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - VkImageLayout src_layout, VkImageLayout dst_layout, - uint32_t src_family_index, uint32_t dst_family_index, - const VkImageSubresourceRange *range, +static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + VkImageLayout src_layout, VkImageLayout dst_layout, uint32_t src_family_index, + uint32_t dst_family_index, const VkImageSubresourceRange *range, struct radv_sample_locations_state *sample_locs); static void @@ -79,8 +77,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy dest_mask |= RADV_DYNAMIC_VIEWPORT; } - if (memcmp(&dest->vk.vp.viewports, &src->vk.vp.viewports, - src->vk.vp.viewport_count * sizeof(VkViewport))) { + if (memcmp(&dest->vk.vp.viewports, &src->vk.vp.viewports, src->vk.vp.viewport_count * sizeof(VkViewport))) { typed_memcpy(dest->vk.vp.viewports, src->vk.vp.viewports, src->vk.vp.viewport_count); typed_memcpy(dest->hw_vp.xform, src->hw_vp.xform, src->vk.vp.viewport_count); dest_mask |= RADV_DYNAMIC_VIEWPORT; @@ -93,8 +90,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy dest_mask |= RADV_DYNAMIC_SCISSOR; } - if (memcmp(&dest->vk.vp.scissors, &src->vk.vp.scissors, - src->vk.vp.scissor_count * sizeof(VkRect2D))) { + if (memcmp(&dest->vk.vp.scissors, &src->vk.vp.scissors, src->vk.vp.scissor_count * sizeof(VkRect2D))) { typed_memcpy(dest->vk.vp.scissors, src->vk.vp.scissors, src->vk.vp.scissor_count); dest_mask |= RADV_DYNAMIC_SCISSOR; } @@ -108,8 +104,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy } if (copy_mask & RADV_DYNAMIC_DISCARD_RECTANGLE) { - if (memcmp(&dest->vk.dr.rectangles, &src->vk.dr.rectangles, - src->vk.dr.rectangle_count * sizeof(VkRect2D))) { + if (memcmp(&dest->vk.dr.rectangles, &src->vk.dr.rectangles, src->vk.dr.rectangle_count * sizeof(VkRect2D))) { typed_memcpy(dest->vk.dr.rectangles, src->vk.dr.rectangles, src->vk.dr.rectangle_count); dest_mask |= RADV_DYNAMIC_DISCARD_RECTANGLE; } @@ -123,8 +118,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy src->sample_location.count * sizeof(VkSampleLocationEXT))) { dest->sample_location.per_pixel = src->sample_location.per_pixel; dest->sample_location.grid_size = src->sample_location.grid_size; - typed_memcpy(dest->sample_location.locations, src->sample_location.locations, - src->sample_location.count); + typed_memcpy(dest->sample_location.locations, src->sample_location.locations, src->sample_location.count); dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS; } } @@ -166,12 +160,12 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy } } -#define RADV_CMP_COPY(field, flag) \ - if (copy_mask & flag) { \ - if (dest->field != src->field) { \ - dest->field = src->field; \ - dest_mask |= flag; \ - } \ +#define RADV_CMP_COPY(field, flag) \ + if (copy_mask & flag) { \ + if (dest->field != src->field) { \ + dest->field = src->field; \ + dest_mask |= flag; \ + } \ } RADV_CMP_COPY(vk.ia.primitive_topology, RADV_DYNAMIC_PRIMITIVE_TOPOLOGY); @@ -251,8 +245,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy cmd_buffer->state.dirty |= RADV_CMD_DIRTY_GUARDBAND; } - if (cmd_buffer->device->physical_device->rad_info.rbplus_allowed && - (dest_mask & RADV_DYNAMIC_COLOR_WRITE_MASK)) { + if (cmd_buffer->device->physical_device->rad_info.rbplus_allowed && (dest_mask & RADV_DYNAMIC_COLOR_WRITE_MASK)) { cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RBPLUS; } } @@ -260,13 +253,11 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer) { - return cmd_buffer->qf == RADV_QUEUE_COMPUTE && - cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7; + return cmd_buffer->qf == RADV_QUEUE_COMPUTE && cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7; } enum amd_ip_type -radv_queue_family_to_ring(const struct radv_physical_device *physical_device, - enum radv_queue_family f) +radv_queue_family_to_ring(const struct radv_physical_device *physical_device, enum radv_queue_family f) { switch (f) { case RADV_QUEUE_GENERAL: @@ -285,8 +276,8 @@ radv_queue_family_to_ring(const struct radv_physical_device *physical_device, } static void -radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va, - unsigned count, const uint32_t *data) +radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va, unsigned count, + const uint32_t *data) { struct radeon_cmdbuf *cs = cmd_buffer->cs; @@ -300,8 +291,7 @@ radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, unsigned engine_ } static void -radv_emit_clear_data(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va, - unsigned size) +radv_emit_clear_data(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va, unsigned size) { uint32_t *zeroes = alloca(size); memset(zeroes, 0, size); @@ -313,8 +303,7 @@ radv_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer) { struct radv_cmd_buffer *cmd_buffer = container_of(vk_cmd_buffer, struct radv_cmd_buffer, vk); - list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) - { + list_for_each_entry_safe (struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) { radv_rmv_log_command_buffer_bo_destroy(cmd_buffer->device, up->upload_bo); cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo); list_del(&up->list); @@ -346,20 +335,17 @@ radv_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer) } static VkResult -radv_create_cmd_buffer(struct vk_command_pool *pool, - struct vk_command_buffer **cmd_buffer_out) +radv_create_cmd_buffer(struct vk_command_pool *pool, struct vk_command_buffer **cmd_buffer_out) { struct radv_device *device = container_of(pool->base.device, struct radv_device, vk); struct radv_cmd_buffer *cmd_buffer; unsigned ring; - cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (cmd_buffer == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - VkResult result = - vk_command_buffer_init(pool, &cmd_buffer->vk, &radv_cmd_buffer_ops, 0); + VkResult result = vk_command_buffer_init(pool, &cmd_buffer->vk, &radv_cmd_buffer_ops, 0); if (result != VK_SUCCESS) { vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer); return result; @@ -373,19 +359,16 @@ radv_create_cmd_buffer(struct vk_command_pool *pool, ring = radv_queue_family_to_ring(device->physical_device, cmd_buffer->qf); - cmd_buffer->cs = device->ws->cs_create( - device->ws, ring, cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); + cmd_buffer->cs = device->ws->cs_create(device->ws, ring, cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); if (!cmd_buffer->cs) { radv_destroy_cmd_buffer(&cmd_buffer->vk); return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } - vk_object_base_init(&device->vk, &cmd_buffer->meta_push_descriptors.base, - VK_OBJECT_TYPE_DESCRIPTOR_SET); + vk_object_base_init(&device->vk, &cmd_buffer->meta_push_descriptors.base, VK_OBJECT_TYPE_DESCRIPTOR_SET); for (unsigned i = 0; i < MAX_BIND_POINTS; i++) - vk_object_base_init(&device->vk, &cmd_buffer->descriptors[i].push_set.set.base, - VK_OBJECT_TYPE_DESCRIPTOR_SET); + vk_object_base_init(&device->vk, &cmd_buffer->descriptors[i].push_set.set.base, VK_OBJECT_TYPE_DESCRIPTOR_SET); *cmd_buffer_out = &cmd_buffer->vk; @@ -399,8 +382,7 @@ radv_cmd_buffer_reset_rendering(struct radv_cmd_buffer *cmd_buffer) } static void -radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, - UNUSED VkCommandBufferResetFlags flags) +radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, UNUSED VkCommandBufferResetFlags flags) { struct radv_cmd_buffer *cmd_buffer = container_of(vk_cmd_buffer, struct radv_cmd_buffer, vk); @@ -410,8 +392,7 @@ radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, if (cmd_buffer->gang.cs) cmd_buffer->device->ws->cs_reset(cmd_buffer->gang.cs); - list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) - { + list_for_each_entry_safe (struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) { radv_rmv_log_command_buffer_bo_destroy(cmd_buffer->device, up->upload_bo); cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo); list_del(&up->list); @@ -468,11 +449,10 @@ radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t m new_size = MAX2(min_needed, 16 * 1024); new_size = MAX2(new_size, 2 * cmd_buffer->upload.size); - VkResult result = - device->ws->buffer_create(device->ws, new_size, 4096, device->ws->cs_domain(device->ws), - RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC, - RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &bo); + VkResult result = device->ws->buffer_create( + device->ws, new_size, 4096, device->ws->cs_domain(device->ws), + RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC, + RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &bo); if (result != VK_SUCCESS) { vk_command_buffer_set_error(&cmd_buffer->vk, result); @@ -502,15 +482,13 @@ radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t m vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY); return false; } - radv_rmv_log_command_buffer_bo_create(device, cmd_buffer->upload.upload_bo, 0, - cmd_buffer->upload.size, 0); + radv_rmv_log_command_buffer_bo_create(device, cmd_buffer->upload.upload_bo, 0, cmd_buffer->upload.size, 0); return true; } bool -radv_cmd_buffer_upload_alloc_aligned(struct radv_cmd_buffer *cmd_buffer, unsigned size, - unsigned alignment, +radv_cmd_buffer_upload_alloc_aligned(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned alignment, unsigned *out_offset, void **ptr) { assert(size % 4 == 0); @@ -542,15 +520,13 @@ radv_cmd_buffer_upload_alloc_aligned(struct radv_cmd_buffer *cmd_buffer, unsigne } bool -radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, - unsigned *out_offset, void **ptr) +radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr) { return radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, size, 0, out_offset, ptr); } bool -radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data, - unsigned *out_offset) +radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data, unsigned *out_offset) { uint8_t *ptr; @@ -569,8 +545,7 @@ radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) struct radeon_cmdbuf *cs = cmd_buffer->cs; uint64_t va; - if (cmd_buffer->qf != RADV_QUEUE_GENERAL && - cmd_buffer->qf != RADV_QUEUE_COMPUTE) + if (cmd_buffer->qf != RADV_QUEUE_GENERAL && cmd_buffer->qf != RADV_QUEUE_COMPUTE) return; va = radv_buffer_get_va(device->trace_bo); @@ -588,29 +563,26 @@ radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) static void radv_gang_barrier(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stage_mask, - VkPipelineStageFlags2 dst_stage_mask) + VkPipelineStageFlags2 dst_stage_mask) { /* Update flush bits from the main cmdbuf, except the stage flush. */ cmd_buffer->gang.flush_bits |= cmd_buffer->state.flush_bits & RADV_CMD_FLUSH_ALL_COMPUTE & ~RADV_CMD_FLAG_CS_PARTIAL_FLUSH; /* Add stage flush only when necessary. */ - if (src_stage_mask & - (VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_2_TRANSFER_BIT | - VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) + if (src_stage_mask & (VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_2_TRANSFER_BIT | + VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) cmd_buffer->gang.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH; /* Block task shaders when we have to wait for CP DMA on the GFX cmdbuf. */ if (src_stage_mask & - (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT | - VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) + (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT | VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | + VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) dst_stage_mask |= cmd_buffer->state.dma_is_busy ? VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT : 0; /* Increment the GFX/ACE semaphore when task shaders are blocked. */ - if (dst_stage_mask & - (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR | VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | - VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT)) + if (dst_stage_mask & (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR | VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT)) cmd_buffer->gang.sem.leader_value++; } @@ -621,9 +593,8 @@ radv_gang_cache_flush(struct radv_cmd_buffer *cmd_buffer) const uint32_t flush_bits = cmd_buffer->gang.flush_bits; enum rgp_flush_bits sqtt_flush_bits = 0; - si_cs_emit_cache_flush(cmd_buffer->device->ws, ace_cs, - cmd_buffer->device->physical_device->rad_info.gfx_level, NULL, 0, true, - flush_bits, &sqtt_flush_bits, 0); + si_cs_emit_cache_flush(cmd_buffer->device->ws, ace_cs, cmd_buffer->device->physical_device->rad_info.gfx_level, NULL, + 0, true, flush_bits, &sqtt_flush_bits, 0); cmd_buffer->gang.flush_bits = 0; } @@ -647,8 +618,7 @@ radv_gang_sem_create(struct radv_cmd_buffer *cmd_buffer) static bool radv_gang_leader_sem_dirty(const struct radv_cmd_buffer *cmd_buffer) { - return cmd_buffer->gang.sem.leader_value != - cmd_buffer->gang.sem.emitted_leader_value; + return cmd_buffer->gang.sem.leader_value != cmd_buffer->gang.sem.emitted_leader_value; } ALWAYS_INLINE static bool @@ -666,11 +636,10 @@ radv_flush_gang_leader_semaphore(struct radv_cmd_buffer *cmd_buffer) ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 12); /* GFX writes a value to the semaphore which ACE can wait for.*/ - si_cs_emit_write_event_eop( - cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.gfx_level, - radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, - EOP_DATA_SEL_VALUE_32BIT, cmd_buffer->gang.sem.va, - cmd_buffer->gang.sem.leader_value, cmd_buffer->gfx9_eop_bug_va); + si_cs_emit_write_event_eop(cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.gfx_level, + radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, + EOP_DATA_SEL_VALUE_32BIT, cmd_buffer->gang.sem.va, cmd_buffer->gang.sem.leader_value, + cmd_buffer->gfx9_eop_bug_va); cmd_buffer->gang.sem.emitted_leader_value = cmd_buffer->gang.sem.leader_value; @@ -686,8 +655,8 @@ radv_wait_gang_leader(struct radv_cmd_buffer *cmd_buffer) radeon_check_space(cmd_buffer->device->ws, ace_cs, 7); /* ACE waits for the semaphore which GFX wrote. */ - radv_cp_wait_mem(ace_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, cmd_buffer->gang.sem.va, - cmd_buffer->gang.sem.leader_value, 0xffffffff); + radv_cp_wait_mem(ace_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, cmd_buffer->gang.sem.va, cmd_buffer->gang.sem.leader_value, + 0xffffffff); } static struct radeon_cmdbuf * @@ -695,8 +664,8 @@ radv_gang_create(struct radv_cmd_buffer *cmd_buffer) { assert(!cmd_buffer->gang.cs); struct radv_device *device = cmd_buffer->device; - struct radeon_cmdbuf *ace_cs = device->ws->cs_create( - device->ws, AMD_IP_COMPUTE, cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); + struct radeon_cmdbuf *ace_cs = + device->ws->cs_create(device->ws, AMD_IP_COMPUTE, cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); if (!ace_cs) vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -756,8 +725,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flu assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)); /* Force wait for graphics or compute engines to be idle. */ - si_cs_emit_cache_flush(device->ws, cmd_buffer->cs, - device->physical_device->rad_info.gfx_level, + si_cs_emit_cache_flush(device->ws, cmd_buffer->cs, device->physical_device->rad_info.gfx_level, &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va, radv_cmd_buffer_uses_mec(cmd_buffer), flags, &sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va); @@ -765,9 +733,8 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flu if (cmd_buffer->state.graphics_pipeline && (flags & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) && radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) { /* Force wait for compute engines to be idle on the internal cmdbuf. */ - si_cs_emit_cache_flush(device->ws, cmd_buffer->gang.cs, - device->physical_device->rad_info.gfx_level, NULL, 0, true, - RADV_CMD_FLAG_CS_PARTIAL_FLUSH, &sqtt_flush_bits, 0); + si_cs_emit_cache_flush(device->ws, cmd_buffer->gang.cs, device->physical_device->rad_info.gfx_level, NULL, 0, + true, RADV_CMD_FLAG_CS_PARTIAL_FLUSH, &sqtt_flush_bits, 0); } } @@ -842,8 +809,7 @@ void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point, struct radv_descriptor_set *set, unsigned idx) { - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); + struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); descriptors_state->sets[idx] = set; @@ -854,15 +820,13 @@ radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint static void radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point) { - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); + struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); struct radv_device *device = cmd_buffer->device; uint32_t data[MAX_SETS * 2] = {0}; uint64_t va; va = radv_buffer_get_va(device->trace_bo) + 40; - u_foreach_bit(i, descriptors_state->valid) - { + u_foreach_bit (i, descriptors_state->valid) { struct radv_descriptor_set *set = descriptors_state->sets[i]; data[i * 2] = (uint64_t)(uintptr_t)set; data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32; @@ -878,8 +842,8 @@ radv_get_user_sgpr(const struct radv_shader *shader, int idx) } static void -radv_emit_userdata_address(struct radv_device *device, struct radeon_cmdbuf *cs, - struct radv_shader *shader, uint32_t base_reg, int idx, uint64_t va) +radv_emit_userdata_address(struct radv_device *device, struct radeon_cmdbuf *cs, struct radv_shader *shader, + uint32_t base_reg, int idx, uint64_t va) { const struct radv_userdata_info *loc = &shader->info.user_sgprs_locs.shader_data[idx]; @@ -907,9 +871,8 @@ radv_descriptor_get_va(const struct radv_descriptor_state *descriptors_state, un } static void -radv_emit_descriptor_pointers(struct radv_device *device, struct radeon_cmdbuf *cs, - struct radv_shader *shader, uint32_t sh_base, - struct radv_descriptor_state *descriptors_state) +radv_emit_descriptor_pointers(struct radv_device *device, struct radeon_cmdbuf *cs, struct radv_shader *shader, + uint32_t sh_base, struct radv_descriptor_state *descriptors_state) { struct radv_userdata_locations *locs = &shader->info.user_sgprs_locs; unsigned mask = locs->descriptor_sets_enabled; @@ -939,10 +902,9 @@ radv_get_rasterization_prim(struct radv_cmd_buffer *cmd_buffer) const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader; const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - if (cmd_buffer->state.active_stages & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | - VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | - VK_SHADER_STAGE_GEOMETRY_BIT | - VK_SHADER_STAGE_MESH_BIT_EXT)) { + if (cmd_buffer->state.active_stages & + (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | + VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_MESH_BIT_EXT)) { /* Ignore dynamic primitive topology for TES/GS/MS stages. */ return cmd_buffer->state.rast_prim; } @@ -999,8 +961,8 @@ radv_get_ps_iter_samples(struct radv_cmd_buffer *cmd_buffer) * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*). */ static void -radv_convert_user_sample_locs(const struct radv_sample_locations_state *state, - uint32_t x, uint32_t y, VkOffset2D *sample_locs) +radv_convert_user_sample_locs(const struct radv_sample_locations_state *state, uint32_t x, uint32_t y, + VkOffset2D *sample_locs) { uint32_t x_offset = x % state->grid_size.width; uint32_t y_offset = y % state->grid_size.height; @@ -1029,8 +991,7 @@ radv_convert_user_sample_locs(const struct radv_sample_locations_state *state, * locations. */ static void -radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs, - uint32_t *sample_locs_pixel) +radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs, uint32_t *sample_locs_pixel) { for (uint32_t i = 0; i < num_samples; i++) { uint32_t sample_reg_idx = i / 4; @@ -1051,8 +1012,7 @@ radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs, * sample locations. */ static uint64_t -radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer, VkOffset2D *sample_locs, - uint32_t num_samples) +radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer, VkOffset2D *sample_locs, uint32_t num_samples) { uint32_t *centroid_priorities = alloca(num_samples * sizeof(*centroid_priorities)); uint32_t sample_mask = num_samples - 1; @@ -1119,32 +1079,20 @@ radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer) switch (num_samples) { case 2: case 4: - radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, - sample_locs_pixel[0][0]); - radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, - sample_locs_pixel[1][0]); - radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, - sample_locs_pixel[2][0]); - radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, - sample_locs_pixel[3][0]); + radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]); + radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]); + radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]); + radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]); break; case 8: - radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, - sample_locs_pixel[0][0]); - radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, - sample_locs_pixel[1][0]); - radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, - sample_locs_pixel[2][0]); - radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, - sample_locs_pixel[3][0]); - radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, - sample_locs_pixel[0][1]); - radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, - sample_locs_pixel[1][1]); - radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, - sample_locs_pixel[2][1]); - radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, - sample_locs_pixel[3][1]); + radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]); + radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]); + radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]); + radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]); + radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, sample_locs_pixel[0][1]); + radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, sample_locs_pixel[1][1]); + radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, sample_locs_pixel[2][1]); + radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, sample_locs_pixel[3][1]); break; default: unreachable("invalid number of samples"); @@ -1156,9 +1104,8 @@ radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer) } static void -radv_emit_inline_push_consts(struct radv_device *device, struct radeon_cmdbuf *cs, - const struct radv_shader *shader, uint32_t base_reg, int idx, - uint32_t *values) +radv_emit_inline_push_consts(struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_shader *shader, + uint32_t base_reg, int idx, uint32_t *values) { const struct radv_userdata_info *loc = &shader->info.user_sgprs_locs.shader_data[idx]; @@ -1195,10 +1142,8 @@ radv_gfx10_compute_bin_size(struct radv_cmd_buffer *cmd_buffer) const unsigned pipe_count = MAX2(rb_count, pdevice->rad_info.num_tcc_blocks); const unsigned db_tag_part = (db_tag_count * rb_count / pipe_count) * db_tag_size * pipe_count; - const unsigned color_tag_part = - (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count; - const unsigned fmask_tag_part = - (fmask_tag_count * rb_count / pipe_count) * fmask_tag_size * pipe_count; + const unsigned color_tag_part = (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count; + const unsigned fmask_tag_part = (fmask_tag_count * rb_count / pipe_count) * fmask_tag_size * pipe_count; const unsigned total_samples = radv_get_rasterization_samples(cmd_buffer); const unsigned samples_log = util_logbase2_ceil(total_samples); @@ -1234,9 +1179,8 @@ radv_gfx10_compute_bin_size(struct radv_cmd_buffer *cmd_buffer) if (fmask_bytes_per_pixel) { const unsigned fmask_pixel_count_log = util_logbase2(fmask_tag_part / fmask_bytes_per_pixel); - const VkExtent2D fmask_extent = - (VkExtent2D){.width = 1ull << ((fmask_pixel_count_log + 1) / 2), - .height = 1ull << (color_pixel_count_log / 2)}; + const VkExtent2D fmask_extent = (VkExtent2D){.width = 1ull << ((fmask_pixel_count_log + 1) / 2), + .height = 1ull << (color_pixel_count_log / 2)}; if (fmask_extent.width * fmask_extent.height < extent.width * extent.height) extent = fmask_extent; @@ -1250,8 +1194,8 @@ radv_gfx10_compute_bin_size(struct radv_cmd_buffer *cmd_buffer) const unsigned db_pixel_count_log = util_logbase2(db_tag_part / db_bytes_per_pixel); - const VkExtent2D db_extent = (VkExtent2D){.width = 1ull << ((db_pixel_count_log + 1) / 2), - .height = 1ull << (color_pixel_count_log / 2)}; + const VkExtent2D db_extent = + (VkExtent2D){.width = 1ull << ((db_pixel_count_log + 1) / 2), .height = 1ull << (color_pixel_count_log / 2)}; if (db_extent.width * db_extent.height < extent.width * extent.height) extent = db_extent; @@ -1481,8 +1425,7 @@ radv_gfx9_compute_bin_size(struct radv_cmd_buffer *cmd_buffer) VkExtent2D extent = {512, 512}; - unsigned log_num_rb_per_se = - util_logbase2_ceil(pdevice->rad_info.max_render_backends / pdevice->rad_info.max_se); + unsigned log_num_rb_per_se = util_logbase2_ceil(pdevice->rad_info.max_render_backends / pdevice->rad_info.max_se); unsigned log_num_se = util_logbase2_ceil(pdevice->rad_info.max_se); unsigned total_samples = radv_get_rasterization_samples(cmd_buffer); @@ -1555,12 +1498,10 @@ radv_get_disabled_binning_state(struct radv_cmd_buffer *cmd_buffer) min_bytes_per_pixel = bytes; } - pa_sc_binner_cntl_0 = - S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_NEW_SC) | S_028C44_BIN_SIZE_X(0) | - S_028C44_BIN_SIZE_Y(0) | S_028C44_BIN_SIZE_X_EXTEND(2) | /* 128 */ - S_028C44_BIN_SIZE_Y_EXTEND(min_bytes_per_pixel <= 4 ? 2 : 1) | /* 128 or 64 */ - S_028C44_DISABLE_START_OF_PRIM(1) | - S_028C44_FLUSH_ON_BINNING_TRANSITION(1); + pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_NEW_SC) | S_028C44_BIN_SIZE_X(0) | + S_028C44_BIN_SIZE_Y(0) | S_028C44_BIN_SIZE_X_EXTEND(2) | /* 128 */ + S_028C44_BIN_SIZE_Y_EXTEND(min_bytes_per_pixel <= 4 ? 2 : 1) | /* 128 or 64 */ + S_028C44_DISABLE_START_OF_PRIM(1) | S_028C44_FLUSH_ON_BINNING_TRANSITION(1); } else { pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) | S_028C44_DISABLE_START_OF_PRIM(1) | @@ -1589,19 +1530,18 @@ radv_get_binning_state(struct radv_cmd_buffer *cmd_buffer) if (device->pbb_allowed && bin_size.width && bin_size.height) { struct radv_binning_settings *settings = &device->physical_device->binning_settings; - pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | - S_028C44_BIN_SIZE_X(bin_size.width == 16) | - S_028C44_BIN_SIZE_Y(bin_size.height == 16) | - S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) | - S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) | - S_028C44_CONTEXT_STATES_PER_BIN(settings->context_states_per_bin - 1) | - S_028C44_PERSISTENT_STATES_PER_BIN(settings->persistent_states_per_bin - 1) | - S_028C44_DISABLE_START_OF_PRIM(1) | - S_028C44_FPOVS_PER_BATCH(settings->fpovs_per_batch) | - S_028C44_OPTIMAL_BIN_SELECTION(1) | - S_028C44_FLUSH_ON_BINNING_TRANSITION(device->physical_device->rad_info.family == CHIP_VEGA12 || - device->physical_device->rad_info.family == CHIP_VEGA20 || - device->physical_device->rad_info.family >= CHIP_RAVEN2); + pa_sc_binner_cntl_0 = + S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | S_028C44_BIN_SIZE_X(bin_size.width == 16) | + S_028C44_BIN_SIZE_Y(bin_size.height == 16) | + S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) | + S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) | + S_028C44_CONTEXT_STATES_PER_BIN(settings->context_states_per_bin - 1) | + S_028C44_PERSISTENT_STATES_PER_BIN(settings->persistent_states_per_bin - 1) | + S_028C44_DISABLE_START_OF_PRIM(1) | S_028C44_FPOVS_PER_BATCH(settings->fpovs_per_batch) | + S_028C44_OPTIMAL_BIN_SELECTION(1) | + S_028C44_FLUSH_ON_BINNING_TRANSITION(device->physical_device->rad_info.family == CHIP_VEGA12 || + device->physical_device->rad_info.family == CHIP_VEGA20 || + device->physical_device->rad_info.family >= CHIP_RAVEN2); } else { pa_sc_binner_cntl_0 = radv_get_disabled_binning_state(cmd_buffer); } @@ -1752,8 +1692,7 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer) case V_028C70_COLOR_8_8: case V_028C70_COLOR_8_8_8_8: /* For 1 and 2-channel formats, use the superset thereof. */ - if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || - spi_format == V_028714_SPI_SHADER_UINT16_ABGR || + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || spi_format == V_028714_SPI_SHADER_UINT16_ABGR || spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); @@ -1793,10 +1732,8 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer) case V_028C70_COLOR_16: case V_028C70_COLOR_16_16: /* For 1-channel formats, use the superset thereof. */ - if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || - spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || - spi_format == V_028714_SPI_SHADER_UINT16_ABGR || - spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { + if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || + spi_format == V_028714_SPI_SHADER_UINT16_ABGR || spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV) sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); else @@ -1871,13 +1808,11 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer *cmd_buffer, struct radv_shader assert((ps_epilog->va >> 32) == cmd_buffer->device->physical_device->rad_info.address32_hi); - struct radv_userdata_info *loc = - &ps_shader->info.user_sgprs_locs.shader_data[AC_UD_PS_EPILOG_PC]; + struct radv_userdata_info *loc = &ps_shader->info.user_sgprs_locs.shader_data[AC_UD_PS_EPILOG_PC]; uint32_t base_reg = ps_shader->info.user_data_0; assert(loc->sgpr_idx != -1); assert(loc->num_sgprs == 1); - radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, - ps_epilog->va, false); + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, ps_epilog->va, false); cmd_buffer->shader_upload_seq = MAX2(cmd_buffer->shader_upload_seq, ps_epilog->upload_seq); @@ -1894,12 +1829,12 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) return; if (cmd_buffer->state.emitted_graphics_pipeline) { - if (radv_rast_prim_is_points_or_lines(cmd_buffer->state.emitted_graphics_pipeline->rast_prim) != radv_rast_prim_is_points_or_lines(pipeline->rast_prim)) + if (radv_rast_prim_is_points_or_lines(cmd_buffer->state.emitted_graphics_pipeline->rast_prim) != + radv_rast_prim_is_points_or_lines(pipeline->rast_prim)) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_GUARDBAND; if (cmd_buffer->state.emitted_graphics_pipeline->custom_blend_mode != pipeline->custom_blend_mode) - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP | - RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP_ENABLE; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP | RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP_ENABLE; if (cmd_buffer->state.emitted_graphics_pipeline->ms.min_sample_shading != pipeline->ms.min_sample_shading || cmd_buffer->state.emitted_graphics_pipeline->uses_out_of_order_rast != pipeline->uses_out_of_order_rast || @@ -1914,8 +1849,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE; } - if (cmd_buffer->state.emitted_graphics_pipeline->db_shader_control != - pipeline->db_shader_control) + if (cmd_buffer->state.emitted_graphics_pipeline->db_shader_control != pipeline->db_shader_control) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE; if (cmd_buffer->state.emitted_graphics_pipeline->db_render_control != pipeline->db_render_control) @@ -1937,7 +1871,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) if ((!cmd_buffer->state.emitted_graphics_pipeline || cmd_buffer->state.emitted_graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT] != - cmd_buffer->state.graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT]) && + cmd_buffer->state.graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT]) && (settings->context_states_per_bin > 1 || settings->persistent_states_per_bin > 1)) { /* Break the batch on PS changes. */ radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); @@ -1960,8 +1894,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) } if (cmd_buffer->state.gs_copy_shader) { - radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, - cmd_buffer->state.gs_copy_shader->bo); + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->state.gs_copy_shader->bo); } if (unlikely(cmd_buffer->device->trace_bo)) @@ -1978,8 +1911,7 @@ radv_get_depth_clip_enable(struct radv_cmd_buffer *cmd_buffer) const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; return d->vk.rs.depth_clip_enable == VK_MESA_DEPTH_CLIP_ENABLE_TRUE || - (d->vk.rs.depth_clip_enable == VK_MESA_DEPTH_CLIP_ENABLE_NOT_CLAMP && - !d->vk.rs.depth_clamp_enable); + (d->vk.rs.depth_clip_enable == VK_MESA_DEPTH_CLIP_ENABLE_NOT_CLAMP && !d->vk.rs.depth_clamp_enable); } static enum radv_depth_clamp_mode @@ -1995,8 +1927,7 @@ radv_get_depth_clamp_mode(struct radv_cmd_buffer *cmd_buffer) /* For optimal performance, depth clamping should always be enabled except if the application * disables clamping explicitly or uses depth values outside of the [0.0, 1.0] range. */ - if (!depth_clip_enable || - device->vk.enabled_extensions.EXT_depth_range_unrestricted) { + if (!depth_clip_enable || device->vk.enabled_extensions.EXT_depth_range_unrestricted) { mode = RADV_DEPTH_CLAMP_MODE_DISABLED; } else { mode = RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE; @@ -2013,8 +1944,7 @@ radv_emit_viewport(struct radv_cmd_buffer *cmd_buffer) enum radv_depth_clamp_mode depth_clamp_mode = radv_get_depth_clamp_mode(cmd_buffer); assert(d->vk.vp.viewport_count); - radeon_set_context_reg_seq(cmd_buffer->cs, R_02843C_PA_CL_VPORT_XSCALE, - d->vk.vp.viewport_count * 6); + radeon_set_context_reg_seq(cmd_buffer->cs, R_02843C_PA_CL_VPORT_XSCALE, d->vk.vp.viewport_count * 6); for (unsigned i = 0; i < d->vk.vp.viewport_count; i++) { radeon_emit(cmd_buffer->cs, fui(d->hw_vp.xform[i].scale[0])); @@ -2034,8 +1964,7 @@ radv_emit_viewport(struct radv_cmd_buffer *cmd_buffer) radeon_emit(cmd_buffer->cs, fui(translate_z)); } - radeon_set_context_reg_seq(cmd_buffer->cs, R_0282D0_PA_SC_VPORT_ZMIN_0, - d->vk.vp.viewport_count * 2); + radeon_set_context_reg_seq(cmd_buffer->cs, R_0282D0_PA_SC_VPORT_ZMIN_0, d->vk.vp.viewport_count * 2); for (unsigned i = 0; i < d->vk.vp.viewport_count; i++) { float zmin, zmax; @@ -2092,8 +2021,7 @@ radv_emit_discard_rectangle(struct radv_cmd_buffer *cmd_buffer) cliprect_rule |= 1u << i; } - radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL, - d->vk.dr.rectangle_count * 2); + radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL, d->vk.dr.rectangle_count * 2); for (unsigned i = 0; i < d->vk.dr.rectangle_count; ++i) { VkRect2D rect = d->vk.dr.rectangles[i]; radeon_emit(cmd_buffer->cs, S_028210_TL_X(rect.offset.x) | S_028210_TL_Y(rect.offset.y)); @@ -2156,10 +2084,10 @@ radv_emit_depth_bias(struct radv_cmd_buffer *cmd_buffer) unsigned slope = fui(d->vk.rs.depth_bias.slope * 16.0f); radeon_set_context_reg_seq(cmd_buffer->cs, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5); - radeon_emit(cmd_buffer->cs, fui(d->vk.rs.depth_bias.clamp)); /* CLAMP */ - radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */ + radeon_emit(cmd_buffer->cs, fui(d->vk.rs.depth_bias.clamp)); /* CLAMP */ + radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */ radeon_emit(cmd_buffer->cs, fui(d->vk.rs.depth_bias.constant)); /* FRONT OFFSET */ - radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */ + radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */ radeon_emit(cmd_buffer->cs, fui(d->vk.rs.depth_bias.constant)); /* BACK OFFSET */ } @@ -2185,25 +2113,23 @@ radv_get_pa_su_sc_mode_cntl(const struct radv_cmd_buffer *cmd_buffer) const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; unsigned pa_su_sc_mode_cntl; - pa_su_sc_mode_cntl = S_028814_CULL_FRONT(!!(d->vk.rs.cull_mode & VK_CULL_MODE_FRONT_BIT)) | - S_028814_CULL_BACK(!!(d->vk.rs.cull_mode & VK_CULL_MODE_BACK_BIT)) | - S_028814_FACE(d->vk.rs.front_face) | - S_028814_POLY_OFFSET_FRONT_ENABLE(d->vk.rs.depth_bias.enable) | - S_028814_POLY_OFFSET_BACK_ENABLE(d->vk.rs.depth_bias.enable) | - S_028814_POLY_OFFSET_PARA_ENABLE(d->vk.rs.depth_bias.enable) | - S_028814_POLY_MODE(d->vk.rs.polygon_mode != V_028814_X_DRAW_TRIANGLES) | - S_028814_POLYMODE_FRONT_PTYPE(d->vk.rs.polygon_mode) | - S_028814_POLYMODE_BACK_PTYPE(d->vk.rs.polygon_mode) | - S_028814_PROVOKING_VTX_LAST(d->vk.rs.provoking_vertex == - VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT); + pa_su_sc_mode_cntl = + S_028814_CULL_FRONT(!!(d->vk.rs.cull_mode & VK_CULL_MODE_FRONT_BIT)) | + S_028814_CULL_BACK(!!(d->vk.rs.cull_mode & VK_CULL_MODE_BACK_BIT)) | S_028814_FACE(d->vk.rs.front_face) | + S_028814_POLY_OFFSET_FRONT_ENABLE(d->vk.rs.depth_bias.enable) | + S_028814_POLY_OFFSET_BACK_ENABLE(d->vk.rs.depth_bias.enable) | + S_028814_POLY_OFFSET_PARA_ENABLE(d->vk.rs.depth_bias.enable) | + S_028814_POLY_MODE(d->vk.rs.polygon_mode != V_028814_X_DRAW_TRIANGLES) | + S_028814_POLYMODE_FRONT_PTYPE(d->vk.rs.polygon_mode) | S_028814_POLYMODE_BACK_PTYPE(d->vk.rs.polygon_mode) | + S_028814_PROVOKING_VTX_LAST(d->vk.rs.provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT); if (gfx_level >= GFX10) { /* Ensure that SC processes the primitive group in the same order as PA produced them. Needed * when either POLY_MODE or PERPENDICULAR_ENDCAP_ENA is set. */ - pa_su_sc_mode_cntl |= S_028814_KEEP_TOGETHER_ENABLE( - d->vk.rs.polygon_mode != V_028814_X_DRAW_TRIANGLES || - d->vk.rs.line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT); + pa_su_sc_mode_cntl |= + S_028814_KEEP_TOGETHER_ENABLE(d->vk.rs.polygon_mode != V_028814_X_DRAW_TRIANGLES || + d->vk.rs.line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT); } return pa_su_sc_mode_cntl; @@ -2223,8 +2149,7 @@ radv_emit_provoking_vertex_mode(struct radv_cmd_buffer *cmd_buffer) const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader; const unsigned stage = last_vgt_shader->info.stage; const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - const struct radv_userdata_info *loc = - radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_PROVOKING_VTX); + const struct radv_userdata_info *loc = radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_PROVOKING_VTX); unsigned provoking_vtx = 0; uint32_t base_reg; @@ -2233,8 +2158,7 @@ radv_emit_provoking_vertex_mode(struct radv_cmd_buffer *cmd_buffer) if (d->vk.rs.provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT) { if (stage == MESA_SHADER_VERTEX) { - provoking_vtx = si_conv_prim_to_gs_out(d->vk.ia.primitive_topology, - last_vgt_shader->info.is_ngg); + provoking_vtx = si_conv_prim_to_gs_out(d->vk.ia.primitive_topology, last_vgt_shader->info.is_ngg); } else { assert(stage == MESA_SHADER_GEOMETRY); provoking_vtx = last_vgt_shader->info.gs.vertices_in - 1; @@ -2249,19 +2173,17 @@ static void radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer) { const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader; - const struct radv_userdata_info *loc = - radv_get_user_sgpr(last_vgt_shader, AC_UD_NUM_VERTS_PER_PRIM); + const struct radv_userdata_info *loc = radv_get_user_sgpr(last_vgt_shader, AC_UD_NUM_VERTS_PER_PRIM); const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; uint32_t base_reg; assert(!cmd_buffer->state.mesh_shading); if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) { - radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cmd_buffer->cs, - R_030908_VGT_PRIMITIVE_TYPE, 1, d->vk.ia.primitive_topology); + radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cmd_buffer->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, + d->vk.ia.primitive_topology); } else { - radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, - d->vk.ia.primitive_topology); + radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, d->vk.ia.primitive_topology); } if (loc->sgpr_idx == -1) @@ -2269,8 +2191,7 @@ radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer) base_reg = last_vgt_shader->info.user_data_0; radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, - si_conv_prim_to_gs_out(d->vk.ia.primitive_topology, - last_vgt_shader->info.is_ngg) + 1); + si_conv_prim_to_gs_out(d->vk.ia.primitive_topology, last_vgt_shader->info.is_ngg) + 1); } static void @@ -2278,16 +2199,15 @@ radv_emit_depth_control(struct radv_cmd_buffer *cmd_buffer) { struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - radeon_set_context_reg( - cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL, - S_028800_Z_ENABLE(d->vk.ds.depth.test_enable ? 1 : 0) | - S_028800_Z_WRITE_ENABLE(d->vk.ds.depth.write_enable ? 1 : 0) | - S_028800_ZFUNC(d->vk.ds.depth.compare_op) | - S_028800_DEPTH_BOUNDS_ENABLE(d->vk.ds.depth.bounds_test.enable ? 1 : 0) | - S_028800_STENCIL_ENABLE(d->vk.ds.stencil.test_enable ? 1 : 0) | - S_028800_BACKFACE_ENABLE(d->vk.ds.stencil.test_enable ? 1 : 0) | - S_028800_STENCILFUNC(d->vk.ds.stencil.front.op.compare) | - S_028800_STENCILFUNC_BF(d->vk.ds.stencil.back.op.compare)); + radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL, + S_028800_Z_ENABLE(d->vk.ds.depth.test_enable ? 1 : 0) | + S_028800_Z_WRITE_ENABLE(d->vk.ds.depth.write_enable ? 1 : 0) | + S_028800_ZFUNC(d->vk.ds.depth.compare_op) | + S_028800_DEPTH_BOUNDS_ENABLE(d->vk.ds.depth.bounds_test.enable ? 1 : 0) | + S_028800_STENCIL_ENABLE(d->vk.ds.stencil.test_enable ? 1 : 0) | + S_028800_BACKFACE_ENABLE(d->vk.ds.stencil.test_enable ? 1 : 0) | + S_028800_STENCILFUNC(d->vk.ds.stencil.front.op.compare) | + S_028800_STENCILFUNC_BF(d->vk.ds.stencil.back.op.compare)); } static void @@ -2295,14 +2215,13 @@ radv_emit_stencil_control(struct radv_cmd_buffer *cmd_buffer) { const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - radeon_set_context_reg( - cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL, - S_02842C_STENCILFAIL(si_translate_stencil_op(d->vk.ds.stencil.front.op.fail)) | - S_02842C_STENCILZPASS(si_translate_stencil_op(d->vk.ds.stencil.front.op.pass)) | - S_02842C_STENCILZFAIL(si_translate_stencil_op(d->vk.ds.stencil.front.op.depth_fail)) | - S_02842C_STENCILFAIL_BF(si_translate_stencil_op(d->vk.ds.stencil.back.op.fail)) | - S_02842C_STENCILZPASS_BF(si_translate_stencil_op(d->vk.ds.stencil.back.op.pass)) | - S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(d->vk.ds.stencil.back.op.depth_fail))); + radeon_set_context_reg(cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL, + S_02842C_STENCILFAIL(si_translate_stencil_op(d->vk.ds.stencil.front.op.fail)) | + S_02842C_STENCILZPASS(si_translate_stencil_op(d->vk.ds.stencil.front.op.pass)) | + S_02842C_STENCILZFAIL(si_translate_stencil_op(d->vk.ds.stencil.front.op.depth_fail)) | + S_02842C_STENCILFAIL_BF(si_translate_stencil_op(d->vk.ds.stencil.back.op.fail)) | + S_02842C_STENCILZPASS_BF(si_translate_stencil_op(d->vk.ds.stencil.back.op.pass)) | + S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(d->vk.ds.stencil.back.op.depth_fail))); } static bool @@ -2312,8 +2231,8 @@ radv_should_force_vrs1x1(struct radv_cmd_buffer *cmd_buffer) const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]; return pdevice->rad_info.gfx_level >= GFX10_3 && - (cmd_buffer->state.ms.sample_shading_enable || (ps && ps->info.ps.reads_sample_mask_in && - !ps->info.ps.needs_poly_line_smooth)); + (cmd_buffer->state.ms.sample_shading_enable || + (ps && ps->info.ps.reads_sample_mask_in && !ps->info.ps.needs_poly_line_smooth)); } static void @@ -2324,8 +2243,8 @@ radv_emit_fragment_shading_rate(struct radv_cmd_buffer *cmd_buffer) /* When per-vertex VRS is forced and the dynamic fragment shading rate is a no-op, ignore * it. This is needed for vkd3d-proton because it always declares per-draw VRS as dynamic. */ - if (cmd_buffer->device->force_vrs != RADV_FORCE_VRS_1x1 && - d->vk.fsr.fragment_size.width == 1 && d->vk.fsr.fragment_size.height == 1 && + if (cmd_buffer->device->force_vrs != RADV_FORCE_VRS_1x1 && d->vk.fsr.fragment_size.width == 1 && + d->vk.fsr.fragment_size.height == 1 && d->vk.fsr.combiner_ops[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR && d->vk.fsr.combiner_ops[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR) return; @@ -2367,8 +2286,7 @@ radv_emit_fragment_shading_rate(struct radv_cmd_buffer *cmd_buffer) } /* Emit per-draw VRS rate which is the first combiner. */ - radeon_set_uconfig_reg(cmd_buffer->cs, R_03098C_GE_VRS_RATE, - S_03098C_RATE_X(rate_x) | S_03098C_RATE_Y(rate_y)); + radeon_set_uconfig_reg(cmd_buffer->cs, R_03098C_GE_VRS_RATE, S_03098C_RATE_X(rate_x) | S_03098C_RATE_Y(rate_y)); /* Disable VRS and use the rates from PS_ITER_SAMPLES if: * @@ -2451,12 +2369,11 @@ radv_emit_clipping(struct radv_cmd_buffer *cmd_buffer) const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; bool depth_clip_enable = radv_get_depth_clip_enable(cmd_buffer); - radeon_set_context_reg(cmd_buffer->cs, R_028810_PA_CL_CLIP_CNTL, - S_028810_DX_RASTERIZATION_KILL(d->vk.rs.rasterizer_discard_enable) | - S_028810_ZCLIP_NEAR_DISABLE(!depth_clip_enable) | - S_028810_ZCLIP_FAR_DISABLE(!depth_clip_enable) | - S_028810_DX_CLIP_SPACE_DEF(!d->vk.vp.depth_clip_negative_one_to_one) | - S_028810_DX_LINEAR_ATTR_CLIP_ENA(1)); + radeon_set_context_reg( + cmd_buffer->cs, R_028810_PA_CL_CLIP_CNTL, + S_028810_DX_RASTERIZATION_KILL(d->vk.rs.rasterizer_discard_enable) | + S_028810_ZCLIP_NEAR_DISABLE(!depth_clip_enable) | S_028810_ZCLIP_FAR_DISABLE(!depth_clip_enable) | + S_028810_DX_CLIP_SPACE_DEF(!d->vk.vp.depth_clip_negative_one_to_one) | S_028810_DX_LINEAR_ATTR_CLIP_ENA(1)); } static bool @@ -2495,9 +2412,8 @@ radv_emit_logic_op(struct radv_cmd_buffer *cmd_buffer) /* RB+ doesn't work with dual source blending, logic op and CB_RESOLVE. */ bool mrt0_is_dual_src = radv_is_mrt0_dual_src(cmd_buffer); - cb_color_control |= - S_028808_DISABLE_DUAL_QUAD(mrt0_is_dual_src || d->vk.cb.logic_op_enable || - cmd_buffer->state.custom_blend_mode == V_028808_CB_RESOLVE); + cb_color_control |= S_028808_DISABLE_DUAL_QUAD(mrt0_is_dual_src || d->vk.cb.logic_op_enable || + cmd_buffer->state.custom_blend_mode == V_028808_CB_RESOLVE); } if (cmd_buffer->state.custom_blend_mode) { @@ -2530,7 +2446,7 @@ radv_emit_color_write(struct radv_cmd_buffer *cmd_buffer) const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; uint32_t color_write_enable = 0, color_write_mask = 0; - u_foreach_bit(i, d->vk.cb.color_write_enables) { + u_foreach_bit (i, d->vk.cb.color_write_enables) { color_write_enable |= 0xfu << (i * 4); } @@ -2544,8 +2460,7 @@ radv_emit_color_write(struct radv_cmd_buffer *cmd_buffer) radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); } - radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, - color_write_mask & color_write_enable); + radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, color_write_mask & color_write_enable); } static void @@ -2562,16 +2477,15 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.graphics_pipeline->dynamic_states & RADV_DYNAMIC_PATCH_CONTROL_POINTS) { /* Compute the number of patches. */ cmd_buffer->state.tess_num_patches = get_tcs_num_patches( - d->vk.ts.patch_control_points, tcs->info.tcs.tcs_vertices_out, - tcs->info.tcs.num_linked_inputs, tcs->info.tcs.num_linked_outputs, - tcs->info.tcs.num_linked_patch_outputs, pdevice->hs.tess_offchip_block_dw_size, - pdevice->rad_info.gfx_level, pdevice->rad_info.family); + d->vk.ts.patch_control_points, tcs->info.tcs.tcs_vertices_out, tcs->info.tcs.num_linked_inputs, + tcs->info.tcs.num_linked_outputs, tcs->info.tcs.num_linked_patch_outputs, + pdevice->hs.tess_offchip_block_dw_size, pdevice->rad_info.gfx_level, pdevice->rad_info.family); /* Compute the LDS size. */ cmd_buffer->state.tess_lds_size = calculate_tess_lds_size( pdevice->rad_info.gfx_level, d->vk.ts.patch_control_points, tcs->info.tcs.tcs_vertices_out, - tcs->info.tcs.num_linked_inputs, cmd_buffer->state.tess_num_patches, - tcs->info.tcs.num_linked_outputs, tcs->info.tcs.num_linked_patch_outputs); + tcs->info.tcs.num_linked_inputs, cmd_buffer->state.tess_num_patches, tcs->info.tcs.num_linked_outputs, + tcs->info.tcs.num_linked_patch_outputs); } ls_hs_config = S_028B58_NUM_PATCHES(cmd_buffer->state.tess_num_patches) | @@ -2615,14 +2529,13 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer) base_reg = cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL]->info.user_data_0; radeon_set_sh_reg(cmd_buffer->cs, base_reg + offchip->sgpr_idx * 4, tcs_offchip_layout); - const struct radv_userdata_info *num_patches = radv_get_user_sgpr( - radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_TESS_EVAL), AC_UD_TES_NUM_PATCHES); + const struct radv_userdata_info *num_patches = + radv_get_user_sgpr(radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_TESS_EVAL), AC_UD_TES_NUM_PATCHES); assert(num_patches->sgpr_idx != -1 && num_patches->num_sgprs == 1); const struct radv_shader *tes = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_TESS_EVAL); base_reg = tes->info.user_data_0; - radeon_set_sh_reg(cmd_buffer->cs, base_reg + num_patches->sgpr_idx * 4, - cmd_buffer->state.tess_num_patches); + radeon_set_sh_reg(cmd_buffer->cs, base_reg + num_patches->sgpr_idx * 4, cmd_buffer->state.tess_num_patches); } static void @@ -2638,25 +2551,22 @@ radv_emit_conservative_rast_mode(struct radv_cmd_buffer *cmd_buffer) const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]; const bool uses_inner_coverage = ps && ps->info.ps.reads_fully_covered; - pa_sc_conservative_rast = S_028C4C_PREZ_AA_MASK_ENABLE(1) | S_028C4C_POSTZ_AA_MASK_ENABLE(1) | - S_028C4C_CENTROID_SAMPLE_OVERRIDE(1); + pa_sc_conservative_rast = + S_028C4C_PREZ_AA_MASK_ENABLE(1) | S_028C4C_POSTZ_AA_MASK_ENABLE(1) | S_028C4C_CENTROID_SAMPLE_OVERRIDE(1); /* Inner coverage requires underestimate conservative rasterization. */ if (d->vk.rs.conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT && !uses_inner_coverage) { - pa_sc_conservative_rast |= S_028C4C_OVER_RAST_ENABLE(1) | - S_028C4C_UNDER_RAST_SAMPLE_SELECT(1) | + pa_sc_conservative_rast |= S_028C4C_OVER_RAST_ENABLE(1) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(1) | S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(1); } else { - pa_sc_conservative_rast |= - S_028C4C_OVER_RAST_SAMPLE_SELECT(1) | S_028C4C_UNDER_RAST_ENABLE(1); + pa_sc_conservative_rast |= S_028C4C_OVER_RAST_SAMPLE_SELECT(1) | S_028C4C_UNDER_RAST_ENABLE(1); } } else { pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1); } - radeon_set_context_reg(cmd_buffer->cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, - pa_sc_conservative_rast); + radeon_set_context_reg(cmd_buffer->cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, pa_sc_conservative_rast); } } @@ -2667,8 +2577,8 @@ radv_emit_depth_clamp_enable(struct radv_cmd_buffer *cmd_buffer) radeon_set_context_reg(cmd_buffer->cs, R_02800C_DB_RENDER_OVERRIDE, S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | - S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) | - S_02800C_DISABLE_VIEWPORT_CLAMP(mode == RADV_DEPTH_CLAMP_MODE_DISABLED)); + S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) | + S_02800C_DISABLE_VIEWPORT_CLAMP(mode == RADV_DEPTH_CLAMP_MODE_DISABLED)); } static void @@ -2693,8 +2603,7 @@ radv_emit_rasterization_samples(struct radv_cmd_buffer *cmd_buffer) /* This should only be set when VRS surfaces aren't enabled on GFX11, otherwise the GPU might * hang. */ - S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(pdevice->rad_info.gfx_level < GFX11 || - !cmd_buffer->state.uses_vrs_attachment); + S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(pdevice->rad_info.gfx_level < GFX11 || !cmd_buffer->state.uses_vrs_attachment); if (!d->sample_location.count) radv_emit_default_sample_locations(cmd_buffer->cs, rasterization_samples); @@ -2718,19 +2627,16 @@ radv_emit_rasterization_samples(struct radv_cmd_buffer *cmd_buffer) } static void -radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, - struct radv_color_buffer_info *cb, struct radv_image_view *iview, - VkImageLayout layout) +radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, struct radv_color_buffer_info *cb, + struct radv_image_view *iview, VkImageLayout layout) { bool is_vi = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX8; uint32_t cb_fdcc_control = cb->cb_dcc_control; uint32_t cb_color_info = cb->cb_color_info; struct radv_image *image = iview->image; - if (!radv_layout_dcc_compressed( - cmd_buffer->device, image, iview->vk.base_mip_level, layout, - radv_image_queue_family_mask(image, cmd_buffer->qf, - cmd_buffer->qf))) { + if (!radv_layout_dcc_compressed(cmd_buffer->device, image, iview->vk.base_mip_level, layout, + radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf))) { if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { cb_fdcc_control &= C_028C78_FDCC_ENABLE; } else { @@ -2738,15 +2644,14 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, } } - const enum radv_fmask_compression fmask_comp = - radv_layout_fmask_compression(cmd_buffer->device, image, layout, - radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf)); + const enum radv_fmask_compression fmask_comp = radv_layout_fmask_compression( + cmd_buffer->device, image, layout, radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf)); if (fmask_comp == RADV_FMASK_COMPRESSION_NONE) { cb_color_info &= C_028C70_COMPRESSION; } - if (radv_image_is_tc_compat_cmask(image) && (radv_is_fmask_decompress_pipeline(cmd_buffer) || - radv_is_dcc_decompress_pipeline(cmd_buffer))) { + if (radv_image_is_tc_compat_cmask(image) && + (radv_is_fmask_decompress_pipeline(cmd_buffer) || radv_is_dcc_decompress_pipeline(cmd_buffer))) { /* If this bit is set, the FMASK decompression operation * doesn't occur (DCC_COMPRESS also implies FMASK_DECOMPRESS). */ @@ -2755,10 +2660,10 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { radeon_set_context_reg_seq(cmd_buffer->cs, R_028C6C_CB_COLOR0_VIEW + index * 0x3c, 4); - radeon_emit(cmd_buffer->cs, cb->cb_color_view); /* CB_COLOR0_VIEW */ - radeon_emit(cmd_buffer->cs, cb->cb_color_info); /* CB_COLOR0_INFO */ - radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); /* CB_COLOR0_ATTRIB */ - radeon_emit(cmd_buffer->cs, cb_fdcc_control); /* CB_COLOR0_FDCC_CONTROL */ + radeon_emit(cmd_buffer->cs, cb->cb_color_view); /* CB_COLOR0_VIEW */ + radeon_emit(cmd_buffer->cs, cb->cb_color_info); /* CB_COLOR0_INFO */ + radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); /* CB_COLOR0_ATTRIB */ + radeon_emit(cmd_buffer->cs, cb_fdcc_control); /* CB_COLOR0_FDCC_CONTROL */ radeon_set_context_reg(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, cb->cb_color_base); radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4, cb->cb_color_base >> 32); @@ -2782,18 +2687,12 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base); - radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4, - cb->cb_color_base >> 32); - radeon_set_context_reg(cmd_buffer->cs, R_028E60_CB_COLOR0_CMASK_BASE_EXT + index * 4, - cb->cb_color_cmask >> 32); - radeon_set_context_reg(cmd_buffer->cs, R_028E80_CB_COLOR0_FMASK_BASE_EXT + index * 4, - cb->cb_color_fmask >> 32); - radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4, - cb->cb_dcc_base >> 32); - radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4, - cb->cb_color_attrib2); - radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4, - cb->cb_color_attrib3); + radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4, cb->cb_color_base >> 32); + radeon_set_context_reg(cmd_buffer->cs, R_028E60_CB_COLOR0_CMASK_BASE_EXT + index * 4, cb->cb_color_cmask >> 32); + radeon_set_context_reg(cmd_buffer->cs, R_028E80_CB_COLOR0_FMASK_BASE_EXT + index * 4, cb->cb_color_fmask >> 32); + radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4, cb->cb_dcc_base >> 32); + radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4, cb->cb_color_attrib2); + radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4, cb->cb_color_attrib3); } else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) { radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); radeon_emit(cmd_buffer->cs, cb->cb_color_base); @@ -2812,8 +2711,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, radeon_emit(cmd_buffer->cs, cb->cb_dcc_base); radeon_emit(cmd_buffer->cs, S_028C98_BASE_256B(cb->cb_dcc_base >> 32)); - radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4, - cb->cb_mrt_epitch); + radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4, cb->cb_mrt_epitch); } else { radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); radeon_emit(cmd_buffer->cs, cb->cb_color_base); @@ -2829,14 +2727,12 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, radeon_emit(cmd_buffer->cs, cb->cb_color_fmask_slice); if (is_vi) { /* DCC BASE */ - radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, - cb->cb_dcc_base); + radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base); } } - if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 - ? G_028C78_FDCC_ENABLE(cb_fdcc_control) - : G_028C70_DCC_ENABLE(cb_color_info)) { + if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 ? G_028C78_FDCC_ENABLE(cb_fdcc_control) + : G_028C70_DCC_ENABLE(cb_color_info)) { /* Drawing with DCC enabled also compresses colorbuffers. */ VkImageSubresourceRange range = { .aspectMask = iview->vk.aspects, @@ -2852,21 +2748,17 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, static void radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, - const struct radv_image_view *iview, VkImageLayout layout, - bool requires_cond_exec) + const struct radv_image_view *iview, VkImageLayout layout, bool requires_cond_exec) { const struct radv_image *image = iview->image; uint32_t db_z_info = ds->db_z_info; uint32_t db_z_info_reg; - if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug || - !radv_image_is_tc_compat_htile(image)) + if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug || !radv_image_is_tc_compat_htile(image)) return; - if (!radv_layout_is_htile_compressed( - cmd_buffer->device, image, layout, - radv_image_queue_family_mask(image, cmd_buffer->qf, - cmd_buffer->qf))) { + if (!radv_layout_is_htile_compressed(cmd_buffer->device, image, layout, + radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf))) { db_z_info &= C_028040_TILE_SURFACE_ENABLE; } @@ -2896,8 +2788,8 @@ radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_ } static void -radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, - struct radv_image_view *iview, VkImageLayout layout) +radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, struct radv_image_view *iview, + VkImageLayout layout) { const struct radv_image *image = iview->image; uint32_t db_z_info = ds->db_z_info; @@ -2905,15 +2797,12 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_ uint32_t db_htile_surface = ds->db_htile_surface; uint32_t db_render_control = ds->db_render_control | cmd_buffer->state.db_render_control; - if (!radv_layout_is_htile_compressed( - cmd_buffer->device, image, layout, - radv_image_queue_family_mask(image, cmd_buffer->qf, - cmd_buffer->qf))) { + if (!radv_layout_is_htile_compressed(cmd_buffer->device, image, layout, + radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf))) { db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(1) | S_028000_STENCIL_COMPRESS_DISABLE(1); } - if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX10_3 && - !cmd_buffer->state.render.vrs_att.iview) { + if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX10_3 && !cmd_buffer->state.render.vrs_att.iview) { db_htile_surface &= C_028ABC_VRS_HTILE_ENCODING; } @@ -2952,20 +2841,16 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_ radeon_emit(cmd_buffer->cs, ds->db_depth_size); radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 10); - radeon_emit(cmd_buffer->cs, db_z_info); /* DB_Z_INFO */ - radeon_emit(cmd_buffer->cs, db_stencil_info); /* DB_STENCIL_INFO */ - radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* DB_Z_READ_BASE */ - radeon_emit(cmd_buffer->cs, - S_028044_BASE_HI(ds->db_z_read_base >> 32)); /* DB_Z_READ_BASE_HI */ - radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* DB_STENCIL_READ_BASE */ - radeon_emit(cmd_buffer->cs, - S_02804C_BASE_HI(ds->db_stencil_read_base >> 32)); /* DB_STENCIL_READ_BASE_HI */ - radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* DB_Z_WRITE_BASE */ - radeon_emit(cmd_buffer->cs, - S_028054_BASE_HI(ds->db_z_write_base >> 32)); /* DB_Z_WRITE_BASE_HI */ - radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* DB_STENCIL_WRITE_BASE */ - radeon_emit(cmd_buffer->cs, - S_02805C_BASE_HI(ds->db_stencil_write_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */ + radeon_emit(cmd_buffer->cs, db_z_info); /* DB_Z_INFO */ + radeon_emit(cmd_buffer->cs, db_stencil_info); /* DB_STENCIL_INFO */ + radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* DB_Z_READ_BASE */ + radeon_emit(cmd_buffer->cs, S_028044_BASE_HI(ds->db_z_read_base >> 32)); /* DB_Z_READ_BASE_HI */ + radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* DB_STENCIL_READ_BASE */ + radeon_emit(cmd_buffer->cs, S_02804C_BASE_HI(ds->db_stencil_read_base >> 32)); /* DB_STENCIL_READ_BASE_HI */ + radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* DB_Z_WRITE_BASE */ + radeon_emit(cmd_buffer->cs, S_028054_BASE_HI(ds->db_z_write_base >> 32)); /* DB_Z_WRITE_BASE_HI */ + radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* DB_STENCIL_WRITE_BASE */ + radeon_emit(cmd_buffer->cs, S_02805C_BASE_HI(ds->db_stencil_write_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */ radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_INFO2, 2); radeon_emit(cmd_buffer->cs, ds->db_z_info2); @@ -2988,8 +2873,7 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_ /* Update the ZRANGE_PRECISION value for the TC-compat bug. */ radv_update_zrange_precision(cmd_buffer, ds, iview, layout, true); - radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, - ds->pa_su_poly_offset_db_fmt_cntl); + radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, ds->pa_su_poly_offset_db_fmt_cntl); } static void @@ -3013,29 +2897,25 @@ radv_emit_null_ds_state(struct radv_cmd_buffer *cmd_buffer) radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 2); } - radeon_emit(cmd_buffer->cs, - S_028040_FORMAT(V_028040_Z_INVALID) | S_028040_NUM_SAMPLES(num_samples)); + radeon_emit(cmd_buffer->cs, S_028040_FORMAT(V_028040_Z_INVALID) | S_028040_NUM_SAMPLES(num_samples)); radeon_emit(cmd_buffer->cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); radeon_set_context_reg(cmd_buffer->cs, R_028000_DB_RENDER_CONTROL, db_render_control); radeon_set_context_reg(cmd_buffer->cs, R_028010_DB_RENDER_OVERRIDE2, S_028010_CENTROID_COMPUTATION_MODE(gfx_level >= GFX10_3)); - } /** * Update the fast clear depth/stencil values if the image is bound as a * depth/stencil buffer. */ static void -radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer, - const struct radv_image_view *iview, +radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects) { const struct radv_image *image = iview->image; struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (cmd_buffer->state.render.ds_att.iview == NULL || - cmd_buffer->state.render.ds_att.iview->image != image) + if (cmd_buffer->state.render.ds_att.iview == NULL || cmd_buffer->state.render.ds_att.iview->image != image) return; if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { @@ -3065,8 +2945,8 @@ radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer, */ static void radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - const VkImageSubresourceRange *range, - VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects) + const VkImageSubresourceRange *range, VkClearDepthStencilValue ds_clear_value, + VkImageAspectFlags aspects) { struct radeon_cmdbuf *cs = cmd_buffer->cs; uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); @@ -3099,8 +2979,7 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image } radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating)); - radeon_emit(cs, - S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); radeon_emit(cs, va); radeon_emit(cs, va >> 32); radeon_emit(cs, value); @@ -3133,8 +3012,7 @@ radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, struct ra } static void -radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, - const struct radv_image_view *iview, +radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, VkClearDepthStencilValue ds_clear_value) { VkImageSubresourceRange range = { @@ -3158,8 +3036,7 @@ radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, * Update the clear depth/stencil values for this image. */ void -radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, - const struct radv_image_view *iview, +radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects) { VkImageSubresourceRange range = { @@ -3244,12 +3121,10 @@ radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image * uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); uint32_t count = 2 * level_count; - ASSERTED unsigned cdw_max = - radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4 + count); + ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4 + count); radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); - radeon_emit(cmd_buffer->cs, - S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); radeon_emit(cmd_buffer->cs, va); radeon_emit(cmd_buffer->cs, va >> 32); @@ -3278,12 +3153,10 @@ radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image * assert(radv_dcc_enabled(image, range->baseMipLevel)); - ASSERTED unsigned cdw_max = - radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4 + count); + ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4 + count); radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); - radeon_emit(cmd_buffer->cs, - S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); radeon_emit(cmd_buffer->cs, va); radeon_emit(cmd_buffer->cs, va >> 32); @@ -3299,18 +3172,16 @@ radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image * * Update the fast clear color values if the image is bound as a color buffer. */ static void -radv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - int cb_idx, uint32_t color_values[2]) +radv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, int cb_idx, + uint32_t color_values[2]) { struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (cb_idx >= cmd_buffer->state.render.color_att_count || - cmd_buffer->state.render.color_att[cb_idx].iview == NULL || + if (cb_idx >= cmd_buffer->state.render.color_att_count || cmd_buffer->state.render.color_att[cb_idx].iview == NULL || cmd_buffer->state.render.color_att[cb_idx].iview->image != image) return; - ASSERTED unsigned cdw_max = - radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4); + ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4); radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2); radeon_emit(cs, color_values[0]); @@ -3337,8 +3208,7 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_im if (radv_image_has_clear_value(image)) { uint64_t va = radv_image_get_fast_clear_va(image, range->baseMipLevel); - ASSERTED unsigned cdw_max = - radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4 + count); + ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4 + count); radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, cmd_buffer->state.predicating)); radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); @@ -3361,8 +3231,7 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_im * Update the clear color values for this image. */ void -radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, - const struct radv_image_view *iview, int cb_idx, +radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, int cb_idx, uint32_t color_values[2]) { struct radv_image *image = iview->image; @@ -3391,8 +3260,7 @@ radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, * Load the clear color values from the image's metadata. */ static void -radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *iview, - int cb_idx) +radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *iview, int cb_idx) { struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radv_image *image = iview->image; @@ -3420,8 +3288,7 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_i radeon_emit(cs, 2); } else { radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | - COPY_DATA_COUNT_SEL); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_COUNT_SEL); radeon_emit(cs, va); radeon_emit(cs, va >> 32); radeon_emit(cs, reg >> 2); @@ -3453,8 +3320,7 @@ radv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer) if (!iview) continue; - if ((radv_image_has_CB_metadata(iview->image) || - radv_dcc_enabled(iview->image, iview->vk.base_mip_level) || + if ((radv_image_has_CB_metadata(iview->image) || radv_dcc_enabled(iview->image, iview->vk.base_mip_level) || radv_dcc_enabled(iview->image, cmd_buffer->state.cb_mip[i])) && cmd_buffer->state.cb_mip[i] != iview->vk.base_mip_level) color_mip_changed = true; @@ -3463,8 +3329,7 @@ radv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer) } if (color_mip_changed) { - cmd_buffer->state.flush_bits |= - RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; } } @@ -3488,8 +3353,7 @@ radv_emit_mip_change_flush_default(struct radv_cmd_buffer *cmd_buffer) } if (need_color_mip_flush) { - cmd_buffer->state.flush_bits |= - RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; } memset(cmd_buffer->state.cb_mip, 0, sizeof(cmd_buffer->state.cb_mip)); @@ -3521,11 +3385,10 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) int i; bool disable_constant_encode_ac01 = false; unsigned color_invalid = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 - ? S_028C70_FORMAT_GFX11(V_028C70_COLOR_INVALID) - : S_028C70_FORMAT_GFX6(V_028C70_COLOR_INVALID); + ? S_028C70_FORMAT_GFX11(V_028C70_COLOR_INVALID) + : S_028C70_FORMAT_GFX6(V_028C70_COLOR_INVALID); - ASSERTED unsigned cdw_max = - radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 48 + MAX_RTS * 70); + ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 48 + MAX_RTS * 70); for (i = 0; i < render->color_att_count; ++i) { struct radv_image_view *iview = render->color_att[i].iview; @@ -3539,25 +3402,22 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, iview->image->bindings[0].bo); assert(iview->vk.aspects & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_PLANE_0_BIT | - VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)); + VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)); if (iview->image->disjoint && iview->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT) { for (uint32_t plane_id = 0; plane_id < iview->image->plane_count; plane_id++) { - radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, - iview->image->bindings[plane_id].bo); + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, iview->image->bindings[plane_id].bo); } } else { uint32_t plane_id = iview->image->disjoint ? iview->plane_id : 0; - radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, - iview->image->bindings[plane_id].bo); + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, iview->image->bindings[plane_id].bo); } radv_emit_fb_color_state(cmd_buffer, i, &render->color_att[i].cb, iview, layout); radv_load_color_clear_metadata(cmd_buffer, iview, i); - if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 && - iview->image->dcc_sign_reinterpret) { + if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 && iview->image->dcc_sign_reinterpret) { /* Disable constant encoding with the clear value of "1" with different DCC signedness * because the hardware will fill "1" instead of the clear value. */ @@ -3576,17 +3436,15 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) radv_emit_fb_ds_state(cmd_buffer, &render->ds_att.ds, iview, layout); - if (radv_layout_is_htile_compressed( - cmd_buffer->device, iview->image, layout, - radv_image_queue_family_mask(iview->image, cmd_buffer->qf, - cmd_buffer->qf))) { + if (radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, layout, + radv_image_queue_family_mask(iview->image, cmd_buffer->qf, cmd_buffer->qf))) { /* Only load the depth/stencil fast clear values when * compressed rendering is enabled. */ radv_load_ds_clear_metadata(cmd_buffer, iview); } - } else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX10_3 && - render->vrs_att.iview && radv_cmd_buffer_get_vrs_image(cmd_buffer)) { + } else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX10_3 && render->vrs_att.iview && + radv_cmd_buffer_get_vrs_image(cmd_buffer)) { /* When a subpass uses a VRS attachment without binding a depth/stencil attachment, we have to * bind our internal depth buffer that contains the VRS data as part of HTILE. */ @@ -3649,21 +3507,19 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) } if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX8) { - bool disable_constant_encode = - cmd_buffer->device->physical_device->rad_info.has_dcc_constant_encode; + bool disable_constant_encode = cmd_buffer->device->physical_device->rad_info.has_dcc_constant_encode; enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level; if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { - radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_FDCC_CONTROL, - S_028424_SAMPLE_MASK_TRACKER_WATERMARK(0)); + radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_FDCC_CONTROL, S_028424_SAMPLE_MASK_TRACKER_WATERMARK(0)); } else { - uint8_t watermark = gfx_level >= GFX10 ? 6 : 4; + uint8_t watermark = gfx_level >= GFX10 ? 6 : 4; radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL, S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(gfx_level <= GFX9) | - S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) | - S_028424_DISABLE_CONSTANT_ENCODE_AC01(disable_constant_encode_ac01) | - S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode)); + S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) | + S_028424_DISABLE_CONSTANT_ENCODE_AC01(disable_constant_encode_ac01) | + S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode)); } } @@ -3678,8 +3534,8 @@ radv_emit_guardband_state(struct radv_cmd_buffer *cmd_buffer) const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; unsigned rast_prim = radv_get_rasterization_prim(cmd_buffer); - si_write_guardband(cmd_buffer->cs, d->vk.vp.viewport_count, d->vk.vp.viewports, rast_prim, - d->vk.rs.polygon_mode, d->vk.rs.line.width); + si_write_guardband(cmd_buffer->cs, d->vk.vp.viewport_count, d->vk.vp.viewports, rast_prim, d->vk.rs.polygon_mode, + d->vk.rs.line.width); cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_GUARDBAND; } @@ -3695,8 +3551,7 @@ radv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer) if (state->index_type < 0) return; - if (state->max_index_count || - !cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) { + if (state->max_index_count || !cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) { radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0)); radeon_emit(cs, state->index_va); radeon_emit(cs, state->index_va >> 32); @@ -3712,8 +3567,8 @@ static void radv_flush_occlusion_query_state(struct radv_cmd_buffer *cmd_buffer) { const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level; - const bool enable_occlusion_queries = cmd_buffer->state.active_occlusion_queries || - cmd_buffer->state.inherited_occlusion_queries; + const bool enable_occlusion_queries = + cmd_buffer->state.active_occlusion_queries || cmd_buffer->state.inherited_occlusion_queries; uint32_t db_count_control; if (!enable_occlusion_queries) { @@ -3721,9 +3576,8 @@ radv_flush_occlusion_query_state(struct radv_cmd_buffer *cmd_buffer) } else { uint32_t sample_rate = util_logbase2(cmd_buffer->state.render.max_samples); bool gfx10_perfect = - gfx_level >= GFX10 && - (cmd_buffer->state.perfect_occlusion_queries_enabled || - cmd_buffer->state.inherited_query_control_flags & VK_QUERY_CONTROL_PRECISE_BIT); + gfx_level >= GFX10 && (cmd_buffer->state.perfect_occlusion_queries_enabled || + cmd_buffer->state.inherited_query_control_flags & VK_QUERY_CONTROL_PRECISE_BIT); if (gfx_level >= GFX7) { /* Always enable PERFECT_ZPASS_COUNTS due to issues with partially @@ -3760,8 +3614,7 @@ radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_ */ /* From total number of attributes to offset. */ - static const uint16_t total_to_offset[16] = {0, 1, 4, 10, 20, 35, 56, 84, - 120, 165, 220, 286, 364, 455, 560, 680}; + static const uint16_t total_to_offset[16] = {0, 1, 4, 10, 20, 35, 56, 84, 120, 165, 220, 286, 364, 455, 560, 680}; unsigned start_index = total_to_offset[num_attributes - 1]; /* From number of instanced attributes to offset. This would require a different LUT depending on @@ -3771,8 +3624,7 @@ radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_ static const uint8_t count_to_offset_total16[16] = {0, 16, 31, 45, 58, 70, 81, 91, 100, 108, 115, 121, 126, 130, 133, 135}; unsigned count = util_bitcount(instance_rate_inputs); - unsigned offset_from_start_index = - count_to_offset_total16[count - 1] - ((16 - num_attributes) * (count - 1)); + unsigned offset_from_start_index = count_to_offset_total16[count - 1] - ((16 - num_attributes) * (count - 1)); unsigned first = ffs(instance_rate_inputs) - 1; return start_index + offset_from_start_index + first; @@ -3822,8 +3674,7 @@ radv_cmp_vs_prolog(const void *a_, const void *b_) } static struct radv_shader_part * -lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs_shader, - uint32_t *nontrivial_divisors) +lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs_shader, uint32_t *nontrivial_divisors) { STATIC_ASSERT(sizeof(union vs_prolog_key_header) == 4); assert(vs_shader->info.vs.dynamic_inputs); @@ -3852,8 +3703,7 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v uint64_t vb_offset = cmd_buffer->vertex_bindings[binding].offset; uint64_t vb_stride; - if (pipeline->dynamic_states & (RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | - RADV_DYNAMIC_VERTEX_INPUT)) { + if (pipeline->dynamic_states & (RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | RADV_DYNAMIC_VERTEX_INPUT)) { vb_stride = cmd_buffer->vertex_bindings[binding].stride; } else { vb_stride = pipeline->binding_stride[binding]; @@ -3870,15 +3720,13 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v const bool can_use_simple_input = cmd_buffer->state.shaders[MESA_SHADER_VERTEX] && - cmd_buffer->state.shaders[MESA_SHADER_VERTEX]->info.is_ngg == - device->physical_device->use_ngg && - cmd_buffer->state.shaders[MESA_SHADER_VERTEX]->info.wave_size == - device->physical_device->ge_wave_size; + cmd_buffer->state.shaders[MESA_SHADER_VERTEX]->info.is_ngg == device->physical_device->use_ngg && + cmd_buffer->state.shaders[MESA_SHADER_VERTEX]->info.wave_size == device->physical_device->ge_wave_size; /* try to use a pre-compiled prolog first */ struct radv_shader_part *prolog = NULL; - if (can_use_simple_input && (!vs_shader->info.vs.as_ls || !instance_rate_inputs) && - !misaligned_mask && !state->alpha_adjust_lo && !state->alpha_adjust_hi) { + if (can_use_simple_input && (!vs_shader->info.vs.as_ls || !instance_rate_inputs) && !misaligned_mask && + !state->alpha_adjust_lo && !state->alpha_adjust_hi) { if (!instance_rate_inputs) { prolog = device->simple_vs_prologs[num_attributes - 1]; } else if (num_attributes <= 16 && !*nontrivial_divisors && !zero_divisors && @@ -3931,7 +3779,8 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v uint8_t *formats = (uint8_t *)&key_words[key_size]; unsigned num_formats = 0; - u_foreach_bit(index, misaligned_mask) formats[num_formats++] = state->formats[index]; + u_foreach_bit (index, misaligned_mask) + formats[num_formats++] = state->formats[index]; while (num_formats & 0x3) formats[num_formats++] = 0; key_size += num_formats / 4u; @@ -3955,14 +3804,12 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v uint32_t hash = radv_hash_vs_prolog(key_words); - if (cmd_buffer->state.emitted_vs_prolog && - cmd_buffer->state.emitted_vs_prolog_key_hash == hash && + if (cmd_buffer->state.emitted_vs_prolog && cmd_buffer->state.emitted_vs_prolog_key_hash == hash && radv_cmp_vs_prolog(key_words, cmd_buffer->state.emitted_vs_prolog_key)) return cmd_buffer->state.emitted_vs_prolog; u_rwlock_rdlock(&device->vs_prologs_lock); - struct hash_entry *prolog_entry = - _mesa_hash_table_search_pre_hashed(device->vs_prologs, hash, key_words); + struct hash_entry *prolog_entry = _mesa_hash_table_search_pre_hashed(device->vs_prologs, hash, key_words); u_rwlock_rdunlock(&device->vs_prologs_lock); if (!prolog_entry) { @@ -4060,8 +3907,7 @@ emit_prolog_inputs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *(inputs++) = input_va; *(inputs++) = input_va >> 32; - u_foreach_bit(index, nontrivial_divisors) - { + u_foreach_bit (index, nontrivial_divisors) { uint32_t div = state->divisors[index]; if (div == 0) { *(inputs++) = 0; @@ -4079,20 +3925,17 @@ emit_prolog_inputs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader input_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + inputs_offset; } - const struct radv_userdata_info *loc = - &vs_shader->info.user_sgprs_locs.shader_data[AC_UD_VS_PROLOG_INPUTS]; + const struct radv_userdata_info *loc = &vs_shader->info.user_sgprs_locs.shader_data[AC_UD_VS_PROLOG_INPUTS]; uint32_t base_reg = vs_shader->info.user_data_0; assert(loc->sgpr_idx != -1); assert(loc->num_sgprs == 2); - radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, - input_va, true); + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, input_va, true); } static void radv_emit_vertex_input(struct radv_cmd_buffer *cmd_buffer) { - const struct radv_shader *vs_shader = - radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_VERTEX); + const struct radv_shader *vs_shader = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_VERTEX); assert(!cmd_buffer->state.mesh_shading); @@ -4100,8 +3943,7 @@ radv_emit_vertex_input(struct radv_cmd_buffer *cmd_buffer) return; uint32_t nontrivial_divisors; - struct radv_shader_part *prolog = - lookup_vs_prolog(cmd_buffer, vs_shader, &nontrivial_divisors); + struct radv_shader_part *prolog = lookup_vs_prolog(cmd_buffer, vs_shader, &nontrivial_divisors); if (!prolog) { vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY); return; @@ -4178,8 +4020,7 @@ radv_emit_tess_domain_origin(struct radv_cmd_buffer *cmd_buffer) } radeon_set_context_reg(cmd_buffer->cs, R_028B6C_VGT_TF_PARAM, - S_028B6C_TYPE(type) | S_028B6C_PARTITIONING(partitioning) | - S_028B6C_TOPOLOGY(topology) | + S_028B6C_TYPE(type) | S_028B6C_PARTITIONING(partitioning) | S_028B6C_TOPOLOGY(topology) | S_028B6C_DISTRIBUTION_MODE(distribution_mode)); } @@ -4255,14 +4096,11 @@ radv_emit_color_blend(struct radv_cmd_buffer *cmd_buffer) * First, get rid of DST in the blend factors: * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) */ - si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, VK_BLEND_FACTOR_DST_COLOR, - VK_BLEND_FACTOR_SRC_COLOR); + si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, VK_BLEND_FACTOR_DST_COLOR, VK_BLEND_FACTOR_SRC_COLOR); - si_blend_remove_dst(&eqA, &srcA, &dstA, VK_BLEND_FACTOR_DST_COLOR, - VK_BLEND_FACTOR_SRC_COLOR); + si_blend_remove_dst(&eqA, &srcA, &dstA, VK_BLEND_FACTOR_DST_COLOR, VK_BLEND_FACTOR_SRC_COLOR); - si_blend_remove_dst(&eqA, &srcA, &dstA, VK_BLEND_FACTOR_DST_ALPHA, - VK_BLEND_FACTOR_SRC_ALPHA); + si_blend_remove_dst(&eqA, &srcA, &dstA, VK_BLEND_FACTOR_DST_ALPHA, VK_BLEND_FACTOR_SRC_ALPHA); /* Look up the ideal settings from tables. */ srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); @@ -4282,11 +4120,10 @@ radv_emit_color_blend(struct radv_cmd_buffer *cmd_buffer) dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; /* Set the final value. */ - sx_mrt_blend_opt[i] = - S_028760_COLOR_SRC_OPT(srcRGB_opt) | S_028760_COLOR_DST_OPT(dstRGB_opt) | - S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | - S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) | - S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); + sx_mrt_blend_opt[i] = S_028760_COLOR_SRC_OPT(srcRGB_opt) | S_028760_COLOR_DST_OPT(dstRGB_opt) | + S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | + S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) | + S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); blend_cntl |= S_028780_ENABLE(1); blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); @@ -4305,15 +4142,15 @@ radv_emit_color_blend(struct radv_cmd_buffer *cmd_buffer) /* Disable RB+ blend optimizations for dual source blending. */ if (mrt0_is_dual_src) { for (unsigned i = 0; i < MAX_RTS; i++) { - sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | - S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); + sx_mrt_blend_opt[i] = + S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); } } /* Disable RB+ blend optimizations on GFX11 when alpha-to-coverage is enabled. */ if (gfx_level >= GFX11 && d->vk.ms.alpha_to_coverage_enable) { - sx_mrt_blend_opt[0] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | - S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); + sx_mrt_blend_opt[0] = + S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); } } @@ -4366,10 +4203,8 @@ lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer) radv_normalize_blend_factor(eqRGB, &srcRGB, &dstRGB); if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA || - srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || - dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || - srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA || - dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA) + srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || + srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA) state.need_src_alpha |= 1 << i; } @@ -4384,8 +4219,7 @@ lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer) uint32_t hash = radv_hash_ps_epilog(&key); u_rwlock_rdlock(&device->ps_epilogs_lock); - struct hash_entry *epilog_entry = - _mesa_hash_table_search_pre_hashed(device->ps_epilogs, hash, &key); + struct hash_entry *epilog_entry = _mesa_hash_table_search_pre_hashed(device->ps_epilogs, hash, &key); u_rwlock_rdunlock(&device->ps_epilogs_lock); if (!epilog_entry) { @@ -4463,17 +4297,13 @@ radv_emit_msaa_state(struct radv_cmd_buffer *cmd_buffer) unsigned ps_iter_samples = radv_get_ps_iter_samples(cmd_buffer); unsigned log_z_samples = util_logbase2(z_samples); unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples); - bool uses_underestimate = - d->vk.rs.conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT; + bool uses_underestimate = d->vk.rs.conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT; - db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) | - S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | - S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | - S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples); + db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) | S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | + S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples); pa_sc_aa_config |= S_028BE0_MSAA_NUM_SAMPLES(uses_underestimate ? 0 : log_samples) | - S_028BE0_MAX_SAMPLE_DIST(max_sample_dist) | - S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | + S_028BE0_MAX_SAMPLE_DIST(max_sample_dist) | S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | S_028BE0_COVERED_CENTROID_IS_CENTER(pdevice->rad_info.gfx_level >= GFX10_3); if (d->vk.rs.line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT) @@ -4493,16 +4323,14 @@ radv_emit_msaa_state(struct radv_cmd_buffer *cmd_buffer) if (pdevice->rad_info.gfx_level == GFX11 && render->ds_att.format == VK_FORMAT_UNDEFINED) { assert(!render->ds_att.iview); radeon_set_context_reg(cmd_buffer->cs, R_028040_DB_Z_INFO, - S_028040_FORMAT(V_028040_Z_INVALID) | - S_028040_NUM_SAMPLES(log_samples)); + S_028040_FORMAT(V_028040_Z_INVALID) | S_028040_NUM_SAMPLES(log_samples)); } radeon_set_context_reg(cmd_buffer->cs, R_028804_DB_EQAA, db_eqaa); radeon_set_context_reg(cmd_buffer->cs, R_028BE0_PA_SC_AA_CONFIG, pa_sc_aa_config); - radeon_set_context_reg(cmd_buffer->cs, R_028A48_PA_SC_MODE_CNTL_0, - S_028A48_ALTERNATE_RBS_PER_TILE(pdevice->rad_info.gfx_level >= GFX9) | - S_028A48_VPORT_SCISSOR_ENABLE(1) | - S_028A48_LINE_STIPPLE_ENABLE(d->vk.rs.line.stipple.enable) | - S_028A48_MSAA_ENABLE(rasterization_samples > 1)); + radeon_set_context_reg( + cmd_buffer->cs, R_028A48_PA_SC_MODE_CNTL_0, + S_028A48_ALTERNATE_RBS_PER_TILE(pdevice->rad_info.gfx_level >= GFX9) | S_028A48_VPORT_SCISSOR_ENABLE(1) | + S_028A48_LINE_STIPPLE_ENABLE(d->vk.rs.line.stipple.enable) | S_028A48_MSAA_ENABLE(rasterization_samples > 1)); } static void @@ -4513,9 +4341,9 @@ radv_emit_line_rasterization_mode(struct radv_cmd_buffer *cmd_buffer) /* The DX10 diamond test is unnecessary with Vulkan and it decreases line rasterization * performance. */ - radeon_set_context_reg(cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL, - S_028BDC_PERPENDICULAR_ENDCAP_ENA( - d->vk.rs.line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT)); + radeon_set_context_reg( + cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL, + S_028BDC_PERPENDICULAR_ENDCAP_ENA(d->vk.rs.line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT)); } static bool @@ -4558,10 +4386,8 @@ radv_emit_attachment_feedback_loop_enable(struct radv_cmd_buffer *cmd_buffer) static void radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const uint64_t states) { - if (states & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT | - RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE | - RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE)) + if (states & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT | RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE | + RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE)) radv_emit_viewport(cmd_buffer); if (states & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT) && @@ -4574,9 +4400,8 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const ui if (states & RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) radv_emit_blend_constants(cmd_buffer); - if (states & - (RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | - RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK)) + if (states & (RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | + RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK)) radv_emit_stencil(cmd_buffer); if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) @@ -4585,8 +4410,7 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const ui if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) radv_emit_depth_bias(cmd_buffer); - if (states & (RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE | - RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE_ENABLE | + if (states & (RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE | RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE_MODE)) radv_emit_discard_rectangle(cmd_buffer); @@ -4601,21 +4425,18 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const ui if (states & (RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE | RADV_CMD_DIRTY_DYNAMIC_POLYGON_MODE | - RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE | - RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE)) + RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE | RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE)) radv_emit_culling(cmd_buffer); - if (states & (RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE | - RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)) + if (states & (RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE | RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)) radv_emit_provoking_vertex_mode(cmd_buffer); if (states & RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) radv_emit_primitive_topology(cmd_buffer); - if (states & - (RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP)) + if (states & (RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | + RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | + RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP)) radv_emit_depth_control(cmd_buffer); if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP) @@ -4627,20 +4448,16 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const ui if (states & RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE) radv_emit_primitive_restart_enable(cmd_buffer); - if (states & (RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE | - RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE)) + if (states & (RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE | + RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE)) radv_emit_clipping(cmd_buffer); if (states & (RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP | RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK | - RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE | + RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK | RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE | RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_EQUATION)) radv_emit_logic_op(cmd_buffer); - if (states & (RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK)) + if (states & (RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE | RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK)) radv_emit_color_write(cmd_buffer); if (states & RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT) @@ -4658,46 +4475,38 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const ui if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_MASK) radv_emit_sample_mask(cmd_buffer); - if (states & (RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE)) + if (states & (RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE)) radv_emit_depth_clamp_enable(cmd_buffer); - if (states & (RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK | - RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_EQUATION | - RADV_CMD_DIRTY_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE)) + if (states & (RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE | RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK | + RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_EQUATION | RADV_CMD_DIRTY_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE)) radv_emit_color_blend(cmd_buffer); if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE) radv_emit_line_rasterization_mode(cmd_buffer); - if (states & (RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | - RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE)) + if (states & (RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE)) radv_emit_rasterization_samples(cmd_buffer); - if (states & (RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE | - RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS | - RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | + if (states & (RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE_ENABLE | RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE | + RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS | RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE)) radv_emit_msaa_state(cmd_buffer); - if (states & (RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE)) + if (states & + (RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE | RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE)) radv_emit_attachment_feedback_loop_enable(cmd_buffer); cmd_buffer->state.dirty &= ~states; } static void -radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer, - struct radv_descriptor_state *descriptors_state) +radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_state *descriptors_state) { struct radv_descriptor_set *set = (struct radv_descriptor_set *)&descriptors_state->push_set.set; unsigned bo_offset; - if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size, set->header.mapped_ptr, - &bo_offset)) + if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size, set->header.mapped_ptr, &bo_offset)) return; set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); @@ -4705,11 +4514,9 @@ radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer, } static void -radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint bind_point) +radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point) { - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); + struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); uint32_t size = MAX_SETS * 4; uint32_t offset; void *ptr; @@ -4731,15 +4538,14 @@ radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); va += offset; - ASSERTED unsigned cdw_max = - radeon_check_space(device->ws, cs, MESA_VULKAN_SHADER_STAGES * 3); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, MESA_VULKAN_SHADER_STAGES * 3); if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { for (unsigned s = MESA_SHADER_VERTEX; s <= MESA_SHADER_FRAGMENT; s++) if (radv_cmdbuf_has_stage(cmd_buffer, s)) radv_emit_userdata_address(device, cs, cmd_buffer->state.shaders[s], - cmd_buffer->state.shaders[s]->info.user_data_0, - AC_UD_INDIRECT_DESCRIPTOR_SETS, va); + cmd_buffer->state.shaders[s]->info.user_data_0, AC_UD_INDIRECT_DESCRIPTOR_SETS, + va); if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_MESH)) radv_emit_userdata_address(device, cs, cmd_buffer->state.shaders[MESA_SHADER_MESH], @@ -4748,8 +4554,7 @@ radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) { radeon_check_space(device->ws, cmd_buffer->gang.cs, 3); - radv_emit_userdata_address(device, cmd_buffer->gang.cs, - cmd_buffer->state.shaders[MESA_SHADER_TASK], + radv_emit_userdata_address(device, cmd_buffer->gang.cs, cmd_buffer->state.shaders[MESA_SHADER_TASK], cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.user_data_0, AC_UD_INDIRECT_DESCRIPTOR_SETS, va); } @@ -4766,11 +4571,9 @@ radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, } ALWAYS_INLINE static void -radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages, - VkPipelineBindPoint bind_point) +radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages, VkPipelineBindPoint bind_point) { - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); + struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); struct radv_device *device = cmd_buffer->device; struct radeon_cmdbuf *cs = cmd_buffer->cs; bool flush_indirect_descriptors; @@ -4783,16 +4586,14 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags st if (flush_indirect_descriptors) radv_flush_indirect_descriptor_sets(cmd_buffer, bind_point); - ASSERTED unsigned cdw_max = - radeon_check_space(device->ws, cs, MAX_SETS * MESA_VULKAN_SHADER_STAGES * 4); + ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, MAX_SETS * MESA_VULKAN_SHADER_STAGES * 4); if (stages & VK_SHADER_STAGE_COMPUTE_BIT) { struct radv_shader *compute_shader = bind_point == VK_PIPELINE_BIND_POINT_COMPUTE ? cmd_buffer->state.shaders[MESA_SHADER_COMPUTE] : cmd_buffer->state.rt_prolog; - radv_emit_descriptor_pointers(device, cs, compute_shader, compute_shader->info.user_data_0, - descriptors_state); + radv_emit_descriptor_pointers(device, cs, compute_shader, compute_shader->info.user_data_0, descriptors_state); } else { radv_foreach_stage(stage, stages & ~VK_SHADER_STAGE_TASK_BIT_EXT) { @@ -4800,13 +4601,11 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags st continue; radv_emit_descriptor_pointers(device, cs, cmd_buffer->state.shaders[stage], - cmd_buffer->state.shaders[stage]->info.user_data_0, - descriptors_state); + cmd_buffer->state.shaders[stage]->info.user_data_0, descriptors_state); } if (stages & VK_SHADER_STAGE_TASK_BIT_EXT) { - radv_emit_descriptor_pointers(device, cmd_buffer->gang.cs, - cmd_buffer->state.shaders[MESA_SHADER_TASK], + radv_emit_descriptor_pointers(device, cmd_buffer->gang.cs, cmd_buffer->state.shaders[MESA_SHADER_TASK], cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.user_data_0, descriptors_state); } @@ -4821,9 +4620,8 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags st } static void -radv_emit_all_inline_push_consts(struct radv_device *device, struct radeon_cmdbuf *cs, - struct radv_shader *shader, uint32_t base_reg, - uint32_t *values, bool *need_push_constants) +radv_emit_all_inline_push_consts(struct radv_device *device, struct radeon_cmdbuf *cs, struct radv_shader *shader, + uint32_t base_reg, uint32_t *values, bool *need_push_constants) { if (radv_get_user_sgpr(shader, AC_UD_PUSH_CONSTANTS)->sgpr_idx != -1) *need_push_constants |= true; @@ -4835,16 +4633,14 @@ radv_emit_all_inline_push_consts(struct radv_device *device, struct radeon_cmdbu const uint8_t base = ffs(mask) - 1; if (mask == u_bit_consecutive64(base, util_last_bit64(mask) - base)) { /* consecutive inline push constants */ - radv_emit_inline_push_consts(device, cs, shader, base_reg, AC_UD_INLINE_PUSH_CONSTANTS, - values + base); + radv_emit_inline_push_consts(device, cs, shader, base_reg, AC_UD_INLINE_PUSH_CONSTANTS, values + base); } else { /* sparse inline push constants */ uint32_t consts[AC_MAX_INLINE_PUSH_CONSTS]; unsigned num_consts = 0; u_foreach_bit64 (idx, mask) consts[num_consts++] = values[idx]; - radv_emit_inline_push_consts(device, cs, shader, base_reg, AC_UD_INLINE_PUSH_CONSTANTS, - consts); + radv_emit_inline_push_consts(device, cs, shader, base_reg, AC_UD_INLINE_PUSH_CONSTANTS, consts); } } @@ -4852,8 +4648,7 @@ ALWAYS_INLINE static VkShaderStageFlags radv_must_flush_constants(const struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages, VkPipelineBindPoint bind_point) { - const struct radv_push_constant_state *push_constants = - radv_get_push_constants_state(cmd_buffer, bind_point); + const struct radv_push_constant_state *push_constants = radv_get_push_constants_state(cmd_buffer, bind_point); if (push_constants->size || push_constants->dynamic_offset_count) return stages & cmd_buffer->push_constant_stages; @@ -4862,15 +4657,12 @@ radv_must_flush_constants(const struct radv_cmd_buffer *cmd_buffer, VkShaderStag } static void -radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages, - VkPipelineBindPoint bind_point) +radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages, VkPipelineBindPoint bind_point) { struct radv_device *device = cmd_buffer->device; struct radeon_cmdbuf *cs = cmd_buffer->cs; - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); - const struct radv_push_constant_state *push_constants = - radv_get_push_constants_state(cmd_buffer, bind_point); + struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); + const struct radv_push_constant_state *push_constants = radv_get_push_constants_state(cmd_buffer, bind_point); struct radv_shader *shader, *prev_shader; bool need_push_constants = false; unsigned offset; @@ -4901,30 +4693,27 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stag radv_emit_all_inline_push_consts(device, cs, compute_shader, compute_shader->info.user_data_0, (uint32_t *)cmd_buffer->push_constants, &need_push_constants); } else { - radv_foreach_stage(stage, internal_stages & ~VK_SHADER_STAGE_TASK_BIT_EXT) { + radv_foreach_stage(stage, internal_stages & ~VK_SHADER_STAGE_TASK_BIT_EXT) + { shader = radv_get_shader(cmd_buffer->state.shaders, stage); if (!shader) continue; radv_emit_all_inline_push_consts(device, cs, shader, shader->info.user_data_0, - (uint32_t *)cmd_buffer->push_constants, - &need_push_constants); + (uint32_t *)cmd_buffer->push_constants, &need_push_constants); } if (internal_stages & VK_SHADER_STAGE_TASK_BIT_EXT) { - radv_emit_all_inline_push_consts(device, cmd_buffer->gang.cs, - cmd_buffer->state.shaders[MESA_SHADER_TASK], + radv_emit_all_inline_push_consts(device, cmd_buffer->gang.cs, cmd_buffer->state.shaders[MESA_SHADER_TASK], cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.user_data_0, - (uint32_t *)cmd_buffer->push_constants, - &need_push_constants); + (uint32_t *)cmd_buffer->push_constants, &need_push_constants); } } if (need_push_constants) { - if (!radv_cmd_buffer_upload_alloc( - cmd_buffer, push_constants->size + 16 * push_constants->dynamic_offset_count, &offset, - &ptr)) + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_constants->size + 16 * push_constants->dynamic_offset_count, + &offset, &ptr)) return; memcpy(ptr, cmd_buffer->push_constants, push_constants->size); @@ -4942,8 +4731,8 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stag ? cmd_buffer->state.shaders[MESA_SHADER_COMPUTE] : cmd_buffer->state.rt_prolog; - radv_emit_userdata_address(device, cs, compute_shader, compute_shader->info.user_data_0, - AC_UD_PUSH_CONSTANTS, va); + radv_emit_userdata_address(device, cs, compute_shader, compute_shader->info.user_data_0, AC_UD_PUSH_CONSTANTS, + va); } else { prev_shader = NULL; radv_foreach_stage(stage, internal_stages & ~VK_SHADER_STAGE_TASK_BIT_EXT) @@ -4952,16 +4741,14 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stag /* Avoid redundantly emitting the address for merged stages. */ if (shader && shader != prev_shader) { - radv_emit_userdata_address(device, cs, shader, shader->info.user_data_0, - AC_UD_PUSH_CONSTANTS, va); + radv_emit_userdata_address(device, cs, shader, shader->info.user_data_0, AC_UD_PUSH_CONSTANTS, va); prev_shader = shader; } } if (internal_stages & VK_SHADER_STAGE_TASK_BIT_EXT) { - radv_emit_userdata_address(device, cmd_buffer->gang.cs, - cmd_buffer->state.shaders[MESA_SHADER_TASK], + radv_emit_userdata_address(device, cmd_buffer->gang.cs, cmd_buffer->state.shaders[MESA_SHADER_TASK], cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.user_data_0, AC_UD_PUSH_CONSTANTS, va); } @@ -4975,8 +4762,7 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stag } void -radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, - const struct radv_graphics_pipeline *pipeline, +radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_graphics_pipeline *pipeline, bool full_null_descriptors, void *vb_ptr) { struct radv_shader *vs_shader = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_VERTEX); @@ -4989,8 +4775,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, vs_shader->info.vs.dynamic_inputs ? &cmd_buffer->state.dynamic_vs_input : NULL; assert(!vs_state || vs_shader->info.vs.use_per_attribute_vb_descs); - const struct ac_vtx_format_info *vtx_info_table = - vs_state ? ac_get_vtx_format_info_table(chip, family) : NULL; + const struct ac_vtx_format_info *vtx_info_table = vs_state ? ac_get_vtx_format_info_table(chip, family) : NULL; while (mask) { unsigned i = u_bit_scan(&mask); @@ -5006,9 +4791,8 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, continue; } - unsigned binding = - vs_state ? cmd_buffer->state.dynamic_vs_input.bindings[i] - : (vs_shader->info.vs.use_per_attribute_vb_descs ? pipeline->attrib_bindings[i] : i); + unsigned binding = vs_state ? cmd_buffer->state.dynamic_vs_input.bindings[i] + : (vs_shader->info.vs.use_per_attribute_vb_descs ? pipeline->attrib_bindings[i] : i); struct radv_buffer *buffer = cmd_buffer->vertex_binding_buffers[binding]; unsigned num_records; unsigned stride; @@ -5020,22 +4804,20 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, if (chip >= GFX10) { rsrc_word3 = vtx_info->dst_sel | S_008F0C_FORMAT(hw_format); } else { - rsrc_word3 = vtx_info->dst_sel | S_008F0C_NUM_FORMAT((hw_format >> 4) & 0x7) | - S_008F0C_DATA_FORMAT(hw_format & 0xf); + rsrc_word3 = + vtx_info->dst_sel | S_008F0C_NUM_FORMAT((hw_format >> 4) & 0x7) | S_008F0C_DATA_FORMAT(hw_format & 0xf); } } else { - rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (chip >= GFX10) rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT); else - rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + rsrc_word3 |= + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); } - if (pipeline->dynamic_states & (RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | - RADV_DYNAMIC_VERTEX_INPUT)) { + if (pipeline->dynamic_states & (RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | RADV_DYNAMIC_VERTEX_INPUT)) { stride = cmd_buffer->vertex_bindings[binding].stride; } else { stride = pipeline->binding_stride[binding]; @@ -5078,8 +4860,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, } if (vs_shader->info.vs.use_per_attribute_vb_descs) { - uint32_t attrib_end = - vs_state ? vs_state->offsets[i] + vs_state->format_sizes[i] : pipeline->attrib_ends[i]; + uint32_t attrib_end = vs_state ? vs_state->offsets[i] + vs_state->format_sizes[i] : pipeline->attrib_ends[i]; if (num_records < attrib_end) { num_records = 0; /* not enough space for one vertex */ @@ -5169,8 +4950,8 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer) va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); va += vb_offset; - radv_emit_userdata_address(cmd_buffer->device, cmd_buffer->cs, vs, vs->info.user_data_0, - AC_UD_VS_VERTEX_BUFFERS, va); + radv_emit_userdata_address(cmd_buffer->device, cmd_buffer->cs, vs, vs->info.user_data_0, AC_UD_VS_VERTEX_BUFFERS, + va); cmd_buffer->state.vb_va = va; cmd_buffer->state.vb_size = vb_desc_alloc_size; @@ -5186,8 +4967,7 @@ static void radv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va) { const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader; - const struct radv_userdata_info *loc = - radv_get_user_sgpr(last_vgt_shader, AC_UD_STREAMOUT_BUFFERS); + const struct radv_userdata_info *loc = radv_get_user_sgpr(last_vgt_shader, AC_UD_STREAMOUT_BUFFERS); uint32_t base_reg; if (loc->sgpr_idx == -1) @@ -5195,16 +4975,14 @@ radv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va) base_reg = last_vgt_shader->info.user_data_0; - radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va, - false); + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va, false); if (cmd_buffer->state.gs_copy_shader) { loc = &cmd_buffer->state.gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_STREAMOUT_BUFFERS]; if (loc->sgpr_idx != -1) { base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0; - radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, - va, false); + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va, false); } } } @@ -5250,13 +5028,12 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) } } - uint32_t rsrc_word3 = - S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); + uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { - rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); + rsrc_word3 |= + S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) { rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); @@ -5283,8 +5060,7 @@ static void radv_flush_ngg_query_state(struct radv_cmd_buffer *cmd_buffer) { const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader; - const struct radv_userdata_info *loc = - radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_QUERY_STATE); + const struct radv_userdata_info *loc = radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_QUERY_STATE); enum radv_ngg_query_state ngg_query_state = radv_ngg_query_none; uint32_t base_reg; @@ -5300,8 +5076,7 @@ radv_flush_ngg_query_state(struct radv_cmd_buffer *cmd_buffer) * primitives. */ if (cmd_buffer->state.active_pipeline_gds_queries || - (cmd_buffer->state.inherited_pipeline_statistics & - VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)) + (cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)) ngg_query_state |= radv_ngg_query_pipeline_stat; if (cmd_buffer->state.active_prims_gen_gds_queries) @@ -5358,8 +5133,7 @@ radv_flush_force_vrs_state(struct radv_cmd_buffer *cmd_buffer) break; } - if (cmd_buffer->state.last_vrs_rates != vrs_rates || - cmd_buffer->state.last_vrs_rates_sgpr_idx != loc->sgpr_idx) { + if (cmd_buffer->state.last_vrs_rates != vrs_rates || cmd_buffer->state.last_vrs_rates_sgpr_idx != loc->sgpr_idx) { radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, vrs_rates); } @@ -5378,8 +5152,7 @@ radv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer) VkShaderStageFlags stages = VK_SHADER_STAGE_ALL_GRAPHICS; radv_flush_descriptors(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS); - const VkShaderStageFlags pc_stages = - radv_must_flush_constants(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS); + const VkShaderStageFlags pc_stages = radv_must_flush_constants(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS); if (pc_stages) radv_flush_constants(cmd_buffer, pc_stages, VK_PIPELINE_BIND_POINT_GRAPHICS); @@ -5428,9 +5201,8 @@ struct radv_draw_info { }; static void -si_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, - bool indirect_draw, bool count_from_stream_output, - uint32_t draw_vertex_count) +si_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, bool indirect_draw, + bool count_from_stream_output, uint32_t draw_vertex_count) { const struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; struct radv_cmd_state *state = &cmd_buffer->state; @@ -5441,14 +5213,13 @@ si_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dr unsigned ia_multi_vgt_param; ia_multi_vgt_param = - si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, count_from_stream_output, - draw_vertex_count, topology, prim_restart_enable, - patch_control_points, state->tess_num_patches); + si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, count_from_stream_output, draw_vertex_count, + topology, prim_restart_enable, patch_control_points, state->tess_num_patches); if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) { if (info->gfx_level == GFX9) { - radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs, - R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param); + radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs, R_030960_IA_MULTI_VGT_PARAM, 4, + ia_multi_vgt_param); } else if (info->gfx_level >= GFX7) { radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); } else { @@ -5478,15 +5249,13 @@ gfx10_emit_ge_cntl(struct radv_cmd_buffer *cmd_buffer) break_wave_at_eoi = true; } } else if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY)) { - const struct radv_legacy_gs_info *gs_state = - &cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info; + const struct radv_legacy_gs_info *gs_state = &cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info; primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(gs_state->vgt_gs_onchip_cntl); } else { primgroup_size = 128; /* recommended without a GS and tess */ } - ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(primgroup_size) | - S_03096C_VERT_GRP_SIZE(256) | /* disable vertex grouping */ + ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(primgroup_size) | S_03096C_VERT_GRP_SIZE(256) | /* disable vertex grouping */ S_03096C_PACKET_TO_ONE_PA(0) /* this should only be set if LINE_STIPPLE_TEX_ENA == 1 */ | S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi); @@ -5510,8 +5279,7 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d gfx10_emit_ge_cntl(cmd_buffer); } else { si_emit_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1, draw_info->indirect, - !!draw_info->strmout_buffer, - draw_info->indirect ? 0 : draw_info->count); + !!draw_info->strmout_buffer, draw_info->indirect ? 0 : draw_info->count); } /* RDNA2 is affected by a hardware bug when instance packing is enabled for adjacent primitive @@ -5525,16 +5293,14 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d disable_instance_packing = true; } - if ((draw_info->indexed && - (state->index_type != state->last_index_type || cmd_buffer->device->uses_shadow_regs)) || + if ((draw_info->indexed && (state->index_type != state->last_index_type || cmd_buffer->device->uses_shadow_regs)) || (info->gfx_level == GFX10_3 && (state->last_index_type == -1 || disable_instance_packing != G_028A7C_DISABLE_INSTANCE_PACKING(state->last_index_type)))) { uint32_t index_type = state->index_type | S_028A7C_DISABLE_INSTANCE_PACKING(disable_instance_packing); if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) { - radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs, - R_03090C_VGT_INDEX_TYPE, 2, index_type); + radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs, R_03090C_VGT_INDEX_TYPE, 2, index_type); } else { radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); radeon_emit(cs, index_type); @@ -5553,9 +5319,7 @@ radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_s if (src_stage_mask & VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT) src_stage_mask |= VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT; - if (src_stage_mask & (VK_PIPELINE_STAGE_2_COPY_BIT | - VK_PIPELINE_STAGE_2_RESOLVE_BIT | - VK_PIPELINE_STAGE_2_BLIT_BIT | + if (src_stage_mask & (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_RESOLVE_BIT | VK_PIPELINE_STAGE_2_BLIT_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT)) { /* Be conservative for now. */ src_stage_mask |= VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT; @@ -5564,26 +5328,21 @@ radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_s if (src_stage_mask & (VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | - VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR | - VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) { + VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR | VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR | + VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) { cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH; } - if (src_stage_mask & - (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT | - VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | - VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) { + if (src_stage_mask & (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | + VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) { cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH; } else if (src_stage_mask & (VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT | - VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | - VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | - VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT | - VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT | + VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT | VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT | VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT)) { cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH; } @@ -5627,8 +5386,7 @@ can_skip_buffer_l2_flushes(struct radv_device *device) */ enum radv_cmd_flush_bits -radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 src_flags, - const struct radv_image *image) +radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 src_flags, const struct radv_image *image) { bool has_CB_meta = true, has_DB_meta = true; bool image_is_coherent = image ? image->l2_coherent : false; @@ -5641,8 +5399,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 src_fla has_DB_meta = false; } - u_foreach_bit64(b, src_flags) - { + u_foreach_bit64 (b, src_flags) { switch ((VkAccessFlags2)BITFIELD64_BIT(b)) { case VK_ACCESS_2_SHADER_WRITE_BIT: case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT: @@ -5704,8 +5461,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 src_fla } enum radv_cmd_flush_bits -radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_flags, - const struct radv_image *image) +radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_flags, const struct radv_image *image) { bool has_CB_meta = true, has_DB_meta = true; enum radv_cmd_flush_bits flush_bits = 0; @@ -5726,11 +5482,9 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_fla /* All the L2 invalidations below are not the CB/DB. So if there are no incoherent images * in the L2 cache in CB/DB mode then they are already usable from all the other L2 clients. */ - image_is_coherent |= - can_skip_buffer_l2_flushes(cmd_buffer->device) && !cmd_buffer->state.rb_noncoherent_dirty; + image_is_coherent |= can_skip_buffer_l2_flushes(cmd_buffer->device) && !cmd_buffer->state.rb_noncoherent_dirty; - u_foreach_bit64(b, dst_flags) - { + u_foreach_bit64 (b, dst_flags) { switch ((VkAccessFlags2)BITFIELD64_BIT(b)) { case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT: /* SMEM loads are used to read compute dispatch size in shaders */ @@ -5826,8 +5580,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_fla } void -radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer, - const struct radv_resolve_barrier *barrier) +radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_resolve_barrier *barrier) { struct radv_rendering_state *render = &cmd_buffer->state.render; @@ -5836,8 +5589,7 @@ radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer, if (!iview) continue; - cmd_buffer->state.flush_bits |= - radv_src_access_flush(cmd_buffer, barrier->src_access_mask, iview->image); + cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_access_mask, iview->image); } if (render->ds_att.iview) { cmd_buffer->state.flush_bits |= @@ -5851,8 +5603,7 @@ radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer, if (!iview) continue; - cmd_buffer->state.flush_bits |= - radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, iview->image); + cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, iview->image); } if (render->ds_att.iview) { cmd_buffer->state.flush_bits |= @@ -5865,8 +5616,7 @@ radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer, static void radv_handle_image_transition_separate(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout, VkImageLayout dst_layout, - VkImageLayout src_stencil_layout, - VkImageLayout dst_stencil_layout, + VkImageLayout src_stencil_layout, VkImageLayout dst_stencil_layout, uint32_t src_family_index, uint32_t dst_family_index, const VkImageSubresourceRange *range, struct radv_sample_locations_state *sample_locs) @@ -5880,28 +5630,24 @@ radv_handle_image_transition_separate(struct radv_cmd_buffer *cmd_buffer, struct /* Depth-only transitions. */ if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { aspect_range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - radv_handle_image_transition(cmd_buffer, image, src_layout, dst_layout, - src_family_index, dst_family_index, &aspect_range, sample_locs); + radv_handle_image_transition(cmd_buffer, image, src_layout, dst_layout, src_family_index, dst_family_index, + &aspect_range, sample_locs); } /* Stencil-only transitions. */ aspect_range.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; - radv_handle_image_transition(cmd_buffer, image, src_stencil_layout, dst_stencil_layout, - src_family_index, dst_family_index, &aspect_range, sample_locs); + radv_handle_image_transition(cmd_buffer, image, src_stencil_layout, dst_stencil_layout, src_family_index, + dst_family_index, &aspect_range, sample_locs); } else { - radv_handle_image_transition(cmd_buffer, image, src_layout, dst_layout, - src_family_index, dst_family_index, range, sample_locs); + radv_handle_image_transition(cmd_buffer, image, src_layout, dst_layout, src_family_index, dst_family_index, range, + sample_locs); } } static void -radv_handle_rendering_image_transition(struct radv_cmd_buffer *cmd_buffer, - struct radv_image_view *view, - uint32_t layer_count, - uint32_t view_mask, - VkImageLayout initial_layout, - VkImageLayout initial_stencil_layout, - VkImageLayout final_layout, +radv_handle_rendering_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *view, + uint32_t layer_count, uint32_t view_mask, VkImageLayout initial_layout, + VkImageLayout initial_stencil_layout, VkImageLayout final_layout, VkImageLayout final_stencil_layout, struct radv_sample_locations_state *sample_locs) { @@ -5919,15 +5665,13 @@ radv_handle_rendering_image_transition(struct radv_cmd_buffer *cmd_buffer, range.layerCount = count; radv_handle_image_transition_separate(cmd_buffer, view->image, initial_layout, final_layout, - initial_stencil_layout, final_stencil_layout, - 0, 0, &range, sample_locs); + initial_stencil_layout, final_stencil_layout, 0, 0, &range, sample_locs); } } else { range.baseArrayLayer = view->vk.base_array_layer; range.layerCount = layer_count; radv_handle_image_transition_separate(cmd_buffer, view->image, initial_layout, final_layout, - initial_stencil_layout, final_stencil_layout, - 0, 0, &range, sample_locs); + initial_stencil_layout, final_stencil_layout, 0, 0, &range, sample_locs); } } @@ -5957,8 +5701,7 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi cmd_buffer->state.last_db_count_control = -1; cmd_buffer->usage_flags = pBeginInfo->flags; - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ALL | RADV_CMD_DIRTY_GUARDBAND | - RADV_CMD_DIRTY_OCCLUSION_QUERY; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ALL | RADV_CMD_DIRTY_GUARDBAND | RADV_CMD_DIRTY_OCCLUSION_QUERY; if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) { uint32_t pred_value = 0; @@ -5970,8 +5713,7 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi cmd_buffer->mec_inv_pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; } - if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 && - cmd_buffer->qf == RADV_QUEUE_GENERAL) { + if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 && cmd_buffer->qf == RADV_QUEUE_GENERAL) { unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends; unsigned fence_offset, eop_bug_offset; void *fence_ptr; @@ -6000,8 +5742,7 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi char gcbiar_data[VK_GCBIARR_DATA_SIZE(MAX_RTS)]; const VkRenderingInfo *resume_info = - vk_get_command_buffer_inheritance_as_rendering_resume(cmd_buffer->vk.level, pBeginInfo, - gcbiar_data); + vk_get_command_buffer_inheritance_as_rendering_resume(cmd_buffer->vk.level, pBeginInfo, gcbiar_data); if (resume_info) { radv_CmdBeginRendering(commandBuffer, resume_info); } else { @@ -6015,30 +5756,26 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi render->max_samples = inheritance_info->rasterizationSamples; render->color_att_count = inheritance_info->colorAttachmentCount; for (uint32_t i = 0; i < render->color_att_count; i++) { - render->color_att[i] = (struct radv_attachment) { + render->color_att[i] = (struct radv_attachment){ .format = inheritance_info->pColorAttachmentFormats[i], }; } assert(inheritance_info->depthAttachmentFormat == VK_FORMAT_UNDEFINED || inheritance_info->stencilAttachmentFormat == VK_FORMAT_UNDEFINED || - inheritance_info->depthAttachmentFormat == - inheritance_info->stencilAttachmentFormat); - render->ds_att = (struct radv_attachment) { .iview = NULL }; + inheritance_info->depthAttachmentFormat == inheritance_info->stencilAttachmentFormat); + render->ds_att = (struct radv_attachment){.iview = NULL}; if (inheritance_info->depthAttachmentFormat != VK_FORMAT_UNDEFINED) render->ds_att.format = inheritance_info->depthAttachmentFormat; if (inheritance_info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED) render->ds_att.format = inheritance_info->stencilAttachmentFormat; } - cmd_buffer->state.inherited_pipeline_statistics = - pBeginInfo->pInheritanceInfo->pipelineStatistics; + cmd_buffer->state.inherited_pipeline_statistics = pBeginInfo->pInheritanceInfo->pipelineStatistics; - if (cmd_buffer->state.inherited_pipeline_statistics & - VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) + if (cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; - cmd_buffer->state.inherited_occlusion_queries = - pBeginInfo->pInheritanceInfo->occlusionQueryEnable; + cmd_buffer->state.inherited_occlusion_queries = pBeginInfo->pInheritanceInfo->occlusionQueryEnable; cmd_buffer->state.inherited_query_control_flags = pBeginInfo->pInheritanceInfo->queryFlags; if (cmd_buffer->state.inherited_occlusion_queries) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY; @@ -6053,9 +5790,8 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi } VKAPI_ATTR void VKAPI_CALL -radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, - uint32_t bindingCount, const VkBuffer *pBuffers, - const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes, +radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount, + const VkBuffer *pBuffers, const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes, const VkDeviceSize *pStrides) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); @@ -6081,8 +5817,7 @@ radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, VkDeviceSize stride = pStrides ? pStrides[i] : vb[idx].stride; if (!!cmd_buffer->vertex_binding_buffers[idx] != !!buffer || - (buffer && ((vb[idx].offset & 0x3) != (pOffsets[i] & 0x3) || - (vb[idx].stride & 0x3) != (stride & 0x3)))) { + (buffer && ((vb[idx].offset & 0x3) != (pOffsets[i] & 0x3) || (vb[idx].stride & 0x3) != (stride & 0x3)))) { misaligned_mask_invalid |= state->bindings_match_attrib ? BITFIELD_BIT(idx) : 0xffffffff; } @@ -6105,8 +5840,7 @@ radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, cmd_buffer->state.vbo_misaligned_mask &= ~misaligned_mask_invalid; } - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER | - RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER | RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT; } static uint32_t @@ -6141,8 +5875,7 @@ radv_get_vgt_index_size(uint32_t type) } VKAPI_ATTR void VKAPI_CALL -radv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, - VkIndexType indexType) +radv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_buffer, index_buffer, buffer); @@ -6152,8 +5885,7 @@ radv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDevice cmd_buffer->state.index_va += index_buffer->offset + offset; int index_size = radv_get_vgt_index_size(vk_to_index_type(indexType)); - cmd_buffer->state.max_index_count = - (vk_buffer_range(&index_buffer->vk, offset, VK_WHOLE_SIZE)) / index_size; + cmd_buffer->state.max_index_count = (vk_buffer_range(&index_buffer->vk, offset, VK_WHOLE_SIZE)) / index_size; radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, index_buffer->bo); cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER; @@ -6194,10 +5926,8 @@ radv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pi RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); unsigned dyn_idx = 0; - const bool no_dynamic_bounds = - cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS; - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); + const bool no_dynamic_bounds = cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS; + struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); for (unsigned i = 0; i < descriptorSetCount; ++i) { unsigned set_idx = i + firstSet; @@ -6208,8 +5938,7 @@ radv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pi /* If the set is already bound we only need to update the * (potentially changed) dynamic offsets. */ - if (descriptors_state->sets[set_idx] != set || - !(descriptors_state->valid & (1u << set_idx))) { + if (descriptors_state->sets[set_idx] != set || !(descriptors_state->valid & (1u << set_idx))) { radv_bind_descriptor_set(cmd_buffer, pipelineBindPoint, set, set_idx); } @@ -6231,8 +5960,7 @@ radv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pi S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { - dst[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); + dst[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) { dst[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); @@ -6249,11 +5977,9 @@ radv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pi static bool radv_init_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_set *set, - struct radv_descriptor_set_layout *layout, - VkPipelineBindPoint bind_point) + struct radv_descriptor_set_layout *layout, VkPipelineBindPoint bind_point) { - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); + struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); set->header.size = layout->size; if (set->header.layout != layout) { @@ -6284,14 +6010,12 @@ radv_init_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, struct radv_de } void -radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout, - uint32_t set, uint32_t descriptorWriteCount, +radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, uint32_t set, uint32_t descriptorWriteCount, const VkWriteDescriptorSet *pDescriptorWrites) { RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); - struct radv_descriptor_set *push_set = - (struct radv_descriptor_set *)&cmd_buffer->meta_push_descriptors; + struct radv_descriptor_set *push_set = (struct radv_descriptor_set *)&cmd_buffer->meta_push_descriptors; unsigned bo_offset; assert(set == 0); @@ -6307,9 +6031,8 @@ radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, push_set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); push_set->header.va += bo_offset; - radv_cmd_update_descriptor_sets(cmd_buffer->device, cmd_buffer, - radv_descriptor_set_to_handle(push_set), descriptorWriteCount, - pDescriptorWrites, 0, NULL); + radv_cmd_update_descriptor_sets(cmd_buffer->device, cmd_buffer, radv_descriptor_set_to_handle(push_set), + descriptorWriteCount, pDescriptorWrites, 0, NULL); radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set); } @@ -6321,15 +6044,12 @@ radv_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer, VkPipelineBindPoint { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); - struct radv_descriptor_set *push_set = - (struct radv_descriptor_set *)&descriptors_state->push_set.set; + struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); + struct radv_descriptor_set *push_set = (struct radv_descriptor_set *)&descriptors_state->push_set.set; assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); - if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout, - pipelineBindPoint)) + if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout, pipelineBindPoint)) return; /* Check that there are no inline uniform block updates when calling vkCmdPushDescriptorSetKHR() @@ -6340,9 +6060,8 @@ radv_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer, VkPipelineBindPoint assert(writeset->descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK); } - radv_cmd_update_descriptor_sets(cmd_buffer->device, cmd_buffer, - radv_descriptor_set_to_handle(push_set), descriptorWriteCount, - pDescriptorWrites, 0, NULL); + radv_cmd_update_descriptor_sets(cmd_buffer->device, cmd_buffer, radv_descriptor_set_to_handle(push_set), + descriptorWriteCount, pDescriptorWrites, 0, NULL); radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set); @@ -6351,25 +6070,22 @@ radv_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer, VkPipelineBindPoint VKAPI_ATTR void VKAPI_CALL radv_CmdPushDescriptorSetWithTemplateKHR(VkCommandBuffer commandBuffer, - VkDescriptorUpdateTemplate descriptorUpdateTemplate, - VkPipelineLayout _layout, uint32_t set, const void *pData) + VkDescriptorUpdateTemplate descriptorUpdateTemplate, VkPipelineLayout _layout, + uint32_t set, const void *pData) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate); - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, templ->bind_point); - struct radv_descriptor_set *push_set = - (struct radv_descriptor_set *)&descriptors_state->push_set.set; + struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, templ->bind_point); + struct radv_descriptor_set *push_set = (struct radv_descriptor_set *)&descriptors_state->push_set.set; assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); - if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout, - templ->bind_point)) + if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout, templ->bind_point)) return; - radv_cmd_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set, - descriptorUpdateTemplate, pData); + radv_cmd_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set, descriptorUpdateTemplate, + pData); radv_set_descriptor_set(cmd_buffer, templ->bind_point, push_set, set); @@ -6377,9 +6093,8 @@ radv_CmdPushDescriptorSetWithTemplateKHR(VkCommandBuffer commandBuffer, } VKAPI_ATTR void VKAPI_CALL -radv_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, - VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, - const void *pValues) +radv_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, + uint32_t offset, uint32_t size, const void *pValues) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); memcpy(cmd_buffer->push_constants + offset, pValues, size); @@ -6393,8 +6108,7 @@ radv_EndCommandBuffer(VkCommandBuffer commandBuffer) radv_emit_mip_change_flush_default(cmd_buffer); - if (cmd_buffer->qf == RADV_QUEUE_GENERAL || - cmd_buffer->qf == RADV_QUEUE_COMPUTE) { + if (cmd_buffer->qf == RADV_QUEUE_GENERAL || cmd_buffer->qf == RADV_QUEUE_COMPUTE) { if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX6) cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2; @@ -6409,10 +6123,7 @@ radv_EndCommandBuffer(VkCommandBuffer commandBuffer) */ if (cmd_buffer->state.rb_noncoherent_dirty && !can_skip_buffer_l2_flushes(cmd_buffer->device)) cmd_buffer->state.flush_bits |= radv_src_access_flush( - cmd_buffer, - VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, - NULL); + cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL); /* Since NGG streamout uses GDS, we need to make GDS idle when * we leave the IB, otherwise another process might overwrite @@ -6447,8 +6158,7 @@ radv_EndCommandBuffer(VkCommandBuffer commandBuffer) } static void -radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, - struct radv_compute_pipeline *pipeline) +radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_compute_pipeline *pipeline) { if (pipeline == cmd_buffer->state.emitted_compute_pipeline) return; @@ -6462,12 +6172,10 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, cmd_buffer->compute_scratch_size_per_wave_needed = MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, pipeline->base.scratch_bytes_per_wave); - cmd_buffer->compute_scratch_waves_wanted = - MAX2(cmd_buffer->compute_scratch_waves_wanted, pipeline->base.max_waves); + cmd_buffer->compute_scratch_waves_wanted = MAX2(cmd_buffer->compute_scratch_waves_wanted, pipeline->base.max_waves); if (pipeline->base.type == RADV_PIPELINE_COMPUTE) { - radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, - cmd_buffer->state.shaders[MESA_SHADER_COMPUTE]->bo); + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->state.shaders[MESA_SHADER_COMPUTE]->bo); } else { radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->state.rt_prolog->bo); radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, @@ -6477,8 +6185,7 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, if (!radv_ray_tracing_stage_is_compiled(&rt_pipeline->stages[i])) continue; - struct radv_shader *shader = container_of(rt_pipeline->stages[i].shader, - struct radv_shader, base); + struct radv_shader *shader = container_of(rt_pipeline->stages[i].shader, struct radv_shader, base); radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, shader->bo); } } @@ -6490,15 +6197,13 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point) { - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); + struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); descriptors_state->dirty |= descriptors_state->valid; } static void -radv_bind_vs_input_state(struct radv_cmd_buffer *cmd_buffer, - const struct radv_graphics_pipeline *pipeline) +radv_bind_vs_input_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_graphics_pipeline *pipeline) { const struct radv_shader *vs_shader = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_VERTEX); const struct radv_vs_input_state *src = &pipeline->vs_input_state; @@ -6508,8 +6213,7 @@ radv_bind_vs_input_state(struct radv_cmd_buffer *cmd_buffer, * two different libraries. Otherwise, if the VS has a prolog, the state is dynamic and there is * nothing to bind. */ - if (!vs_shader || !vs_shader->info.vs.has_prolog || - (pipeline->dynamic_states & RADV_DYNAMIC_VERTEX_INPUT)) + if (!vs_shader || !vs_shader->info.vs.has_prolog || (pipeline->dynamic_states & RADV_DYNAMIC_VERTEX_INPUT)) return; cmd_buffer->state.dynamic_vs_input = *src; @@ -6524,8 +6228,7 @@ radv_bind_vs_input_state(struct radv_cmd_buffer *cmd_buffer, } static void -radv_bind_multisample_state(struct radv_cmd_buffer *cmd_buffer, - const struct radv_multisample_state *ms) +radv_bind_multisample_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_multisample_state *ms) { if (ms->sample_shading_enable) { cmd_buffer->state.ms.sample_shading_enable = true; @@ -6539,10 +6242,8 @@ radv_bind_pre_rast_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_ bool mesh_shading = shader->info.stage == MESA_SHADER_MESH; const struct radv_userdata_info *loc; - assert(shader->info.stage == MESA_SHADER_VERTEX || - shader->info.stage == MESA_SHADER_TESS_CTRL || - shader->info.stage == MESA_SHADER_TESS_EVAL || - shader->info.stage == MESA_SHADER_GEOMETRY || + assert(shader->info.stage == MESA_SHADER_VERTEX || shader->info.stage == MESA_SHADER_TESS_CTRL || + shader->info.stage == MESA_SHADER_TESS_EVAL || shader->info.stage == MESA_SHADER_GEOMETRY || shader->info.stage == MESA_SHADER_MESH); if (radv_get_user_sgpr(shader, AC_UD_NGG_PROVOKING_VTX)->sgpr_idx != -1) { @@ -6589,8 +6290,8 @@ radv_bind_pre_rast_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_ /* Re-emit VRS state because the combiner is different (vertex vs primitive). Re-emit * primitive topology because the mesh shading pipeline clobbered it. */ - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE | - RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY; + cmd_buffer->state.dirty |= + RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE | RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY; } cmd_buffer->state.mesh_shading = mesh_shading; @@ -6614,8 +6315,7 @@ radv_bind_tess_ctrl_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv /* Always re-emit patch control points/domain origin when a new pipeline with tessellation is * bound because a bunch of parameters (user SGPRs, TCS vertices out, ccw, etc) can be different. */ - cmd_buffer->state.dirty |= - RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS | RADV_CMD_DIRTY_DYNAMIC_TESS_DOMAIN_ORIGIN; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS | RADV_CMD_DIRTY_DYNAMIC_TESS_DOMAIN_ORIGIN; } static void @@ -6631,10 +6331,8 @@ radv_bind_geometry_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_ { radv_bind_pre_rast_shader(cmd_buffer, gs); - cmd_buffer->esgs_ring_size_needed = - MAX2(cmd_buffer->esgs_ring_size_needed, gs->info.gs_ring_info.esgs_ring_size); - cmd_buffer->gsvs_ring_size_needed = - MAX2(cmd_buffer->gsvs_ring_size_needed, gs->info.gs_ring_info.gsvs_ring_size); + cmd_buffer->esgs_ring_size_needed = MAX2(cmd_buffer->esgs_ring_size_needed, gs->info.gs_ring_info.esgs_ring_size); + cmd_buffer->gsvs_ring_size_needed = MAX2(cmd_buffer->gsvs_ring_size_needed, gs->info.gs_ring_info.gsvs_ring_size); } static void @@ -6658,8 +6356,8 @@ radv_bind_fragment_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_ /* Re-emit the FS state because the SGPR idx can be different. */ if (radv_get_user_sgpr(ps, AC_UD_PS_STATE)->sgpr_idx != -1) { - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | - RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE; + cmd_buffer->state.dirty |= + RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE; } /* Re-emit the conservative rasterization mode because inner coverage is different. */ @@ -6668,8 +6366,8 @@ radv_bind_fragment_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_ if (gfx_level >= GFX10_3 && (!previous_ps || previous_ps->info.ps.reads_sample_mask_in != ps->info.ps.reads_sample_mask_in)) - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | - RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE; + cmd_buffer->state.dirty |= + RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE; if (!previous_ps || radv_ps_can_enable_early_z(previous_ps) != radv_ps_can_enable_early_z(ps)) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE; @@ -6706,8 +6404,7 @@ radv_bind_task_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_shad /* This function binds/unbinds a shader to the cmdbuffer state. */ static void -radv_bind_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader, - gl_shader_stage stage) +radv_bind_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader, gl_shader_stage stage) { if (!shader) { cmd_buffer->state.shaders[stage] = NULL; @@ -6716,10 +6413,9 @@ radv_bind_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader, /* Reset some dynamic states when a shader stage is unbound. */ switch (stage) { case MESA_SHADER_FRAGMENT: - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE | - RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | - RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE | - RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE; + cmd_buffer->state.dirty |= + RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE | RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | + RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE | RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE; break; default: break; @@ -6761,12 +6457,11 @@ radv_bind_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader, cmd_buffer->state.active_stages |= mesa_to_vk_shader_stage(stage); } -#define RADV_GRAPHICS_STAGES \ +#define RADV_GRAPHICS_STAGES \ (VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT) VKAPI_ATTR void VKAPI_CALL -radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, - VkPipeline _pipeline) +radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); @@ -6779,8 +6474,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline return; radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint); - radv_bind_shader(cmd_buffer, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], - MESA_SHADER_COMPUTE); + radv_bind_shader(cmd_buffer, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE); cmd_buffer->state.compute_pipeline = compute_pipeline; cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT; @@ -6793,8 +6487,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline return; radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint); - radv_bind_shader(cmd_buffer, rt_pipeline->base.base.shaders[MESA_SHADER_INTERSECTION], - MESA_SHADER_INTERSECTION); + radv_bind_shader(cmd_buffer, rt_pipeline->base.base.shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION); cmd_buffer->state.rt_prolog = rt_pipeline->base.base.shaders[MESA_SHADER_COMPUTE]; cmd_buffer->state.rt_pipeline = rt_pipeline; @@ -6812,13 +6505,14 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline return; radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint); - radv_foreach_stage(stage, (cmd_buffer->state.active_stages | graphics_pipeline->active_stages) & RADV_GRAPHICS_STAGES) { + radv_foreach_stage(stage, + (cmd_buffer->state.active_stages | graphics_pipeline->active_stages) & RADV_GRAPHICS_STAGES) + { radv_bind_shader(cmd_buffer, graphics_pipeline->base.shaders[stage], stage); } cmd_buffer->state.gs_copy_shader = graphics_pipeline->base.gs_copy_shader; - cmd_buffer->state.last_vgt_shader = - graphics_pipeline->base.shaders[graphics_pipeline->last_vgt_api_stage]; + cmd_buffer->state.last_vgt_shader = graphics_pipeline->base.shaders[graphics_pipeline->last_vgt_api_stage]; cmd_buffer->state.graphics_pipeline = graphics_pipeline; @@ -6830,8 +6524,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS; if (cmd_buffer->device->physical_device->rad_info.has_vgt_flush_ngg_legacy_bug && - cmd_buffer->state.emitted_graphics_pipeline && - cmd_buffer->state.emitted_graphics_pipeline->is_ngg && + cmd_buffer->state.emitted_graphics_pipeline && cmd_buffer->state.emitted_graphics_pipeline->is_ngg && !cmd_buffer->state.graphics_pipeline->is_ngg) { /* Transitioning from NGG to legacy GS requires * VGT_FLUSH on GFX10 and Navi21. VGT_FLUSH @@ -6851,8 +6544,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline } } - const struct radv_shader *vs = - radv_get_shader(graphics_pipeline->base.shaders, MESA_SHADER_VERTEX); + const struct radv_shader *vs = radv_get_shader(graphics_pipeline->base.shaders, MESA_SHADER_VERTEX); if (vs) { /* Re-emit the VS prolog when a new vertex shader is bound. */ if (vs->info.vs.has_prolog) { @@ -6899,16 +6591,14 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline break; } - cmd_buffer->push_constant_state[vk_to_bind_point(pipelineBindPoint)].size = - pipeline->push_constant_size; + cmd_buffer->push_constant_state[vk_to_bind_point(pipelineBindPoint)].size = pipeline->push_constant_size; cmd_buffer->push_constant_state[vk_to_bind_point(pipelineBindPoint)].dynamic_offset_count = pipeline->dynamic_offset_count; cmd_buffer->descriptors[vk_to_bind_point(pipelineBindPoint)].need_indirect_descriptor_sets = pipeline->need_indirect_descriptor_sets; if (cmd_buffer->device->shader_use_invisible_vram) - cmd_buffer->shader_upload_seq = - MAX2(cmd_buffer->shader_upload_seq, pipeline->shader_upload_seq); + cmd_buffer->shader_upload_seq = MAX2(cmd_buffer->shader_upload_seq, pipeline->shader_upload_seq); } VKAPI_ATTR void VKAPI_CALL @@ -6925,11 +6615,9 @@ radv_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint3 if (state->dynamic.vk.vp.viewport_count < total_count) state->dynamic.vk.vp.viewport_count = total_count; - memcpy(state->dynamic.vk.vp.viewports + firstViewport, pViewports, - viewportCount * sizeof(*pViewports)); + memcpy(state->dynamic.vk.vp.viewports + firstViewport, pViewports, viewportCount * sizeof(*pViewports)); for (unsigned i = 0; i < viewportCount; i++) { - radv_get_viewport_xform(&pViewports[i], - state->dynamic.hw_vp.xform[i + firstViewport].scale, + radv_get_viewport_xform(&pViewports[i], state->dynamic.hw_vp.xform[i + firstViewport].scale, state->dynamic.hw_vp.xform[i + firstViewport].translate); } @@ -6950,8 +6638,7 @@ radv_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_ if (state->dynamic.vk.vp.scissor_count < total_count) state->dynamic.vk.vp.scissor_count = total_count; - memcpy(state->dynamic.vk.vp.scissors + firstScissor, pScissors, - scissorCount * sizeof(*pScissors)); + memcpy(state->dynamic.vk.vp.scissors + firstScissor, pScissors, scissorCount * sizeof(*pScissors)); state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR; } @@ -6968,8 +6655,8 @@ radv_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, - float depthBiasClamp, float depthBiasSlopeFactor) +radv_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp, + float depthBiasSlopeFactor) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7005,8 +6692,7 @@ radv_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, floa } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, - uint32_t compareMask) +radv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7020,8 +6706,7 @@ radv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, - uint32_t writeMask) +radv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7035,8 +6720,7 @@ radv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags fa } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, - uint32_t reference) +radv_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7060,15 +6744,13 @@ radv_CmdSetDiscardRectangleEXT(VkCommandBuffer commandBuffer, uint32_t firstDisc assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES); assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES); - typed_memcpy(&state->dynamic.vk.dr.rectangles[firstDiscardRectangle], pDiscardRectangles, - discardRectangleCount); + typed_memcpy(&state->dynamic.vk.dr.rectangles[firstDiscardRectangle], pDiscardRectangles, discardRectangleCount); state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE; } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer, - const VkSampleLocationsInfoEXT *pSampleLocationsInfo) +radv_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer, const VkSampleLocationsInfoEXT *pSampleLocationsInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7078,15 +6760,14 @@ radv_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer, state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel; state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize; state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount; - typed_memcpy(&state->dynamic.sample_location.locations[0], - pSampleLocationsInfo->pSampleLocations, pSampleLocationsInfo->sampleLocationsCount); + typed_memcpy(&state->dynamic.sample_location.locations[0], pSampleLocationsInfo->pSampleLocations, + pSampleLocationsInfo->sampleLocationsCount); state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetLineStippleEXT(VkCommandBuffer commandBuffer, uint32_t lineStippleFactor, - uint16_t lineStipplePattern) +radv_CmdSetLineStippleEXT(VkCommandBuffer commandBuffer, uint32_t lineStippleFactor, uint16_t lineStipplePattern) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7140,15 +6821,13 @@ radv_CmdSetPrimitiveTopology(VkCommandBuffer commandBuffer, VkPrimitiveTopology } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetViewportWithCount(VkCommandBuffer commandBuffer, uint32_t viewportCount, - const VkViewport *pViewports) +radv_CmdSetViewportWithCount(VkCommandBuffer commandBuffer, uint32_t viewportCount, const VkViewport *pViewports) { radv_CmdSetViewport(commandBuffer, 0, viewportCount, pViewports); } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetScissorWithCount(VkCommandBuffer commandBuffer, uint32_t scissorCount, - const VkRect2D *pScissors) +radv_CmdSetScissorWithCount(VkCommandBuffer commandBuffer, uint32_t scissorCount, const VkRect2D *pScissors) { radv_CmdSetScissor(commandBuffer, 0, scissorCount, pScissors); } @@ -7210,9 +6889,8 @@ radv_CmdSetStencilTestEnable(VkCommandBuffer commandBuffer, VkBool32 stencilTest } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetStencilOp(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, - VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp, - VkCompareOp compareOp) +radv_CmdSetStencilOp(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, VkStencilOp failOp, VkStencilOp passOp, + VkStencilOp depthFailOp, VkCompareOp compareOp) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7392,8 +7070,7 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD if (!(vtx_info->has_hw_format & BITFIELD_BIT(vtx_info->num_channels - 1))) vs_state->nontrivial_formats |= BITFIELD_BIT(loc); - if ((chip == GFX6 || chip >= GFX10) && - state->vbo_bound_mask & BITFIELD_BIT(attrib->binding)) { + if ((chip == GFX6 || chip >= GFX10) && state->vbo_bound_mask & BITFIELD_BIT(attrib->binding)) { if (binding->stride & align_req_minus_1) { state->vbo_misaligned_mask |= BITFIELD_BIT(loc); } else if ((cmd_buffer->vertex_bindings[attrib->binding].offset + vs_state->offsets[loc]) & @@ -7423,8 +7100,7 @@ radv_CmdSetPolygonModeEXT(VkCommandBuffer commandBuffer, VkPolygonMode polygonMo } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetTessellationDomainOriginEXT(VkCommandBuffer commandBuffer, - VkTessellationDomainOrigin domainOrigin) +radv_CmdSetTessellationDomainOriginEXT(VkCommandBuffer commandBuffer, VkTessellationDomainOrigin domainOrigin) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7468,8 +7144,7 @@ radv_CmdSetAlphaToCoverageEnableEXT(VkCommandBuffer commandBuffer, VkBool32 alph } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetSampleMaskEXT(VkCommandBuffer commandBuffer, VkSampleCountFlagBits samples, - const VkSampleMask *pSampleMask) +radv_CmdSetSampleMaskEXT(VkCommandBuffer commandBuffer, VkSampleCountFlagBits samples, const VkSampleMask *pSampleMask) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7514,8 +7189,7 @@ radv_CmdSetDepthClipNegativeOneToOneEXT(VkCommandBuffer commandBuffer, VkBool32 } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetProvokingVertexModeEXT(VkCommandBuffer commandBuffer, - VkProvokingVertexModeEXT provokingVertexMode) +radv_CmdSetProvokingVertexModeEXT(VkCommandBuffer commandBuffer, VkProvokingVertexModeEXT provokingVertexMode) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7537,8 +7211,8 @@ radv_CmdSetDepthClampEnableEXT(VkCommandBuffer commandBuffer, VkBool32 depthClam } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetColorWriteMaskEXT(VkCommandBuffer commandBuffer, uint32_t firstAttachment, - uint32_t attachmentCount, const VkColorComponentFlags *pColorWriteMasks) +radv_CmdSetColorWriteMaskEXT(VkCommandBuffer commandBuffer, uint32_t firstAttachment, uint32_t attachmentCount, + const VkColorComponentFlags *pColorWriteMasks) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7558,8 +7232,8 @@ radv_CmdSetColorWriteMaskEXT(VkCommandBuffer commandBuffer, uint32_t firstAttach } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetColorBlendEnableEXT(VkCommandBuffer commandBuffer, uint32_t firstAttachment, - uint32_t attachmentCount, const VkBool32* pColorBlendEnables) +radv_CmdSetColorBlendEnableEXT(VkCommandBuffer commandBuffer, uint32_t firstAttachment, uint32_t attachmentCount, + const VkBool32 *pColorBlendEnables) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7576,8 +7250,7 @@ radv_CmdSetColorBlendEnableEXT(VkCommandBuffer commandBuffer, uint32_t firstAtta } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetRasterizationSamplesEXT(VkCommandBuffer commandBuffer, - VkSampleCountFlagBits rasterizationSamples) +radv_CmdSetRasterizationSamplesEXT(VkCommandBuffer commandBuffer, VkSampleCountFlagBits rasterizationSamples) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7588,8 +7261,7 @@ radv_CmdSetRasterizationSamplesEXT(VkCommandBuffer commandBuffer, } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetLineRasterizationModeEXT(VkCommandBuffer commandBuffer, - VkLineRasterizationModeEXT lineRasterizationMode) +radv_CmdSetLineRasterizationModeEXT(VkCommandBuffer commandBuffer, VkLineRasterizationModeEXT lineRasterizationMode) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7600,8 +7272,7 @@ radv_CmdSetLineRasterizationModeEXT(VkCommandBuffer commandBuffer, } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetColorBlendEquationEXT(VkCommandBuffer commandBuffer, uint32_t firstAttachment, - uint32_t attachmentCount, +radv_CmdSetColorBlendEquationEXT(VkCommandBuffer commandBuffer, uint32_t firstAttachment, uint32_t attachmentCount, const VkColorBlendEquationEXT *pColorBlendEquations) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); @@ -7611,18 +7282,12 @@ radv_CmdSetColorBlendEquationEXT(VkCommandBuffer commandBuffer, uint32_t firstAt for (uint32_t i = 0; i < attachmentCount; i++) { unsigned idx = firstAttachment + i; - state->dynamic.vk.cb.attachments[idx].src_color_blend_factor = - pColorBlendEquations[i].srcColorBlendFactor; - state->dynamic.vk.cb.attachments[idx].dst_color_blend_factor = - pColorBlendEquations[i].dstColorBlendFactor; - state->dynamic.vk.cb.attachments[idx].color_blend_op = - pColorBlendEquations[i].colorBlendOp; - state->dynamic.vk.cb.attachments[idx].src_alpha_blend_factor = - pColorBlendEquations[i].srcAlphaBlendFactor; - state->dynamic.vk.cb.attachments[idx].dst_alpha_blend_factor = - pColorBlendEquations[i].dstAlphaBlendFactor; - state->dynamic.vk.cb.attachments[idx].alpha_blend_op = - pColorBlendEquations[i].alphaBlendOp; + state->dynamic.vk.cb.attachments[idx].src_color_blend_factor = pColorBlendEquations[i].srcColorBlendFactor; + state->dynamic.vk.cb.attachments[idx].dst_color_blend_factor = pColorBlendEquations[i].dstColorBlendFactor; + state->dynamic.vk.cb.attachments[idx].color_blend_op = pColorBlendEquations[i].colorBlendOp; + state->dynamic.vk.cb.attachments[idx].src_alpha_blend_factor = pColorBlendEquations[i].srcAlphaBlendFactor; + state->dynamic.vk.cb.attachments[idx].dst_alpha_blend_factor = pColorBlendEquations[i].dstAlphaBlendFactor; + state->dynamic.vk.cb.attachments[idx].alpha_blend_op = pColorBlendEquations[i].alphaBlendOp; } state->dirty |= RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_EQUATION; @@ -7652,8 +7317,7 @@ radv_CmdSetDiscardRectangleEnableEXT(VkCommandBuffer commandBuffer, VkBool32 dis } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetDiscardRectangleModeEXT(VkCommandBuffer commandBuffer, - VkDiscardRectangleModeEXT discardRectangleMode) +radv_CmdSetDiscardRectangleModeEXT(VkCommandBuffer commandBuffer, VkDiscardRectangleModeEXT discardRectangleMode) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7664,8 +7328,7 @@ radv_CmdSetDiscardRectangleModeEXT(VkCommandBuffer commandBuffer, } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetAttachmentFeedbackLoopEnableEXT(VkCommandBuffer commandBuffer, - VkImageAspectFlags aspectMask) +radv_CmdSetAttachmentFeedbackLoopEnableEXT(VkCommandBuffer commandBuffer, VkImageAspectFlags aspectMask) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_cmd_state *state = &cmd_buffer->state; @@ -7676,8 +7339,7 @@ radv_CmdSetAttachmentFeedbackLoopEnableEXT(VkCommandBuffer commandBuffer, } VKAPI_ATTR void VKAPI_CALL -radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, - const VkCommandBuffer *pCmdBuffers) +radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers) { RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer); @@ -7698,16 +7360,13 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou * DRAW_{INDEX}_INDIRECT_{MULTI} on GFX6-7 because it's illegal and hangs the GPU. */ const bool allow_ib2 = - !secondary->state.uses_draw_indirect || - secondary->device->physical_device->rad_info.gfx_level >= GFX8; + !secondary->state.uses_draw_indirect || secondary->device->physical_device->rad_info.gfx_level >= GFX8; primary->scratch_size_per_wave_needed = MAX2(primary->scratch_size_per_wave_needed, secondary->scratch_size_per_wave_needed); - primary->scratch_waves_wanted = - MAX2(primary->scratch_waves_wanted, secondary->scratch_waves_wanted); + primary->scratch_waves_wanted = MAX2(primary->scratch_waves_wanted, secondary->scratch_waves_wanted); primary->compute_scratch_size_per_wave_needed = - MAX2(primary->compute_scratch_size_per_wave_needed, - secondary->compute_scratch_size_per_wave_needed); + MAX2(primary->compute_scratch_size_per_wave_needed, secondary->compute_scratch_size_per_wave_needed); primary->compute_scratch_waves_wanted = MAX2(primary->compute_scratch_waves_wanted, secondary->compute_scratch_waves_wanted); @@ -7822,9 +7481,8 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou /* After executing commands from secondary buffers we have to dirty * some states. */ - primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_INDEX_BUFFER | - RADV_CMD_DIRTY_GUARDBAND | RADV_CMD_DIRTY_DYNAMIC_ALL | - RADV_CMD_DIRTY_NGG_QUERY | RADV_CMD_DIRTY_OCCLUSION_QUERY; + primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_GUARDBAND | + RADV_CMD_DIRTY_DYNAMIC_ALL | RADV_CMD_DIRTY_NGG_QUERY | RADV_CMD_DIRTY_OCCLUSION_QUERY; radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS); radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE); @@ -7859,8 +7517,7 @@ static VkImageLayout attachment_initial_layout(const VkRenderingAttachmentInfo *att) { const VkRenderingAttachmentInitialLayoutInfoMESA *layout_info = - vk_find_struct_const(att->pNext, - RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA); + vk_find_struct_const(att->pNext, RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA); if (layout_info != NULL) return layout_info->initialLayout; @@ -7875,15 +7532,16 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe const struct VkSampleLocationsInfoEXT *sample_locs_info = vk_find_struct_const(pRenderingInfo->pNext, SAMPLE_LOCATIONS_INFO_EXT); - struct radv_sample_locations_state sample_locations = { .count = 0, }; + struct radv_sample_locations_state sample_locations = { + .count = 0, + }; if (sample_locs_info) { - sample_locations = (struct radv_sample_locations_state) { + sample_locations = (struct radv_sample_locations_state){ .per_pixel = sample_locs_info->sampleLocationsPerPixel, .grid_size = sample_locs_info->sampleLocationGridSize, .count = sample_locs_info->sampleLocationsCount, }; - typed_memcpy(sample_locations.locations, - sample_locs_info->pSampleLocations, + typed_memcpy(sample_locations.locations, sample_locs_info->pSampleLocations, sample_locs_info->sampleLocationsCount); } @@ -7898,10 +7556,9 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe uint32_t color_samples = 0, ds_samples = 0; struct radv_attachment color_att[MAX_RTS]; for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) { - const VkRenderingAttachmentInfo *att_info = - &pRenderingInfo->pColorAttachments[i]; + const VkRenderingAttachmentInfo *att_info = &pRenderingInfo->pColorAttachments[i]; - color_att[i] = (struct radv_attachment) { .iview = NULL }; + color_att[i] = (struct radv_attachment){.iview = NULL}; if (att_info->imageView == VK_NULL_HANDLE) continue; @@ -7911,11 +7568,9 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe color_att[i].layout = att_info->imageLayout; radv_initialise_color_surface(cmd_buffer->device, &color_att[i].cb, iview); - if (att_info->resolveMode != VK_RESOLVE_MODE_NONE && - att_info->resolveImageView != VK_NULL_HANDLE) { + if (att_info->resolveMode != VK_RESOLVE_MODE_NONE && att_info->resolveImageView != VK_NULL_HANDLE) { color_att[i].resolve_mode = att_info->resolveMode; - color_att[i].resolve_iview = - radv_image_view_from_handle(att_info->resolveImageView); + color_att[i].resolve_iview = radv_image_view_from_handle(att_info->resolveImageView); color_att[i].resolve_layout = att_info->resolveImageLayout; } @@ -7924,16 +7579,13 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe VkImageLayout initial_layout = attachment_initial_layout(att_info); if (initial_layout != color_att[i].layout) { assert(!(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)); - radv_handle_rendering_image_transition(cmd_buffer, color_att[i].iview, - pRenderingInfo->layerCount, - pRenderingInfo->viewMask, - initial_layout, VK_IMAGE_LAYOUT_UNDEFINED, - color_att[i].layout, VK_IMAGE_LAYOUT_UNDEFINED, - &sample_locations); + radv_handle_rendering_image_transition(cmd_buffer, color_att[i].iview, pRenderingInfo->layerCount, + pRenderingInfo->viewMask, initial_layout, VK_IMAGE_LAYOUT_UNDEFINED, + color_att[i].layout, VK_IMAGE_LAYOUT_UNDEFINED, &sample_locations); } } - struct radv_attachment ds_att = { .iview = NULL }; + struct radv_attachment ds_att = {.iview = NULL}; const VkRenderingAttachmentInfo *d_att_info = pRenderingInfo->pDepthAttachment; const VkRenderingAttachmentInfo *s_att_info = pRenderingInfo->pStencilAttachment; if ((d_att_info != NULL && d_att_info->imageView != VK_NULL_HANDLE) || @@ -7948,8 +7600,7 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe initial_depth_layout = attachment_initial_layout(d_att_info); ds_att.layout = d_att_info->imageLayout; - if (d_att_info->resolveMode != VK_RESOLVE_MODE_NONE && - d_att_info->resolveImageView != VK_NULL_HANDLE) { + if (d_att_info->resolveMode != VK_RESOLVE_MODE_NONE && d_att_info->resolveImageView != VK_NULL_HANDLE) { d_res_iview = radv_image_view_from_handle(d_att_info->resolveImageView); ds_att.resolve_mode = d_att_info->resolveMode; ds_att.resolve_layout = d_att_info->resolveImageLayout; @@ -7961,8 +7612,7 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe initial_stencil_layout = attachment_initial_layout(s_att_info); ds_att.stencil_layout = s_att_info->imageLayout; - if (s_att_info->resolveMode != VK_RESOLVE_MODE_NONE && - s_att_info->resolveImageView != VK_NULL_HANDLE) { + if (s_att_info->resolveMode != VK_RESOLVE_MODE_NONE && s_att_info->resolveImageView != VK_NULL_HANDLE) { s_res_iview = radv_image_view_from_handle(s_att_info->resolveImageView); ds_att.stencil_resolve_mode = s_att_info->resolveMode; ds_att.stencil_resolve_layout = s_att_info->resolveImageLayout; @@ -7970,8 +7620,7 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe } assert(d_iview == NULL || s_iview == NULL || d_iview == s_iview); - ds_att.iview = d_iview ? d_iview : s_iview, - ds_att.format = ds_att.iview->vk.format; + ds_att.iview = d_iview ? d_iview : s_iview, ds_att.format = ds_att.iview->vk.format; radv_initialise_ds_surface(cmd_buffer->device, &ds_att.ds, ds_att.iview); assert(d_res_iview == NULL || s_res_iview == NULL || d_res_iview == s_res_iview); @@ -7979,28 +7628,23 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe ds_samples = ds_att.iview->vk.image->samples; - if (initial_depth_layout != ds_att.layout || - initial_stencil_layout != ds_att.stencil_layout) { + if (initial_depth_layout != ds_att.layout || initial_stencil_layout != ds_att.stencil_layout) { assert(!(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)); - radv_handle_rendering_image_transition(cmd_buffer, ds_att.iview, - pRenderingInfo->layerCount, - pRenderingInfo->viewMask, - initial_depth_layout, initial_stencil_layout, - ds_att.layout, ds_att.stencil_layout, - &sample_locations); + radv_handle_rendering_image_transition(cmd_buffer, ds_att.iview, pRenderingInfo->layerCount, + pRenderingInfo->viewMask, initial_depth_layout, initial_stencil_layout, + ds_att.layout, ds_att.stencil_layout, &sample_locations); } } if (cmd_buffer->vk.render_pass) radv_describe_barrier_end(cmd_buffer); const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_info = - vk_find_struct_const(pRenderingInfo->pNext, - RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR); - struct radv_attachment vrs_att = { .iview = NULL }; - VkExtent2D vrs_texel_size = { .width = 0 }; + vk_find_struct_const(pRenderingInfo->pNext, RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR); + struct radv_attachment vrs_att = {.iview = NULL}; + VkExtent2D vrs_texel_size = {.width = 0}; if (fsr_info && fsr_info->imageView) { VK_FROM_HANDLE(radv_image_view, iview, fsr_info->imageView); - vrs_att = (struct radv_attachment) { + vrs_att = (struct radv_attachment){ .format = iview->vk.format, .iview = iview, .layout = fsr_info->imageLayout, @@ -8054,8 +7698,7 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe render->area.offset.x + render->area.extent.height <= ds_image->vk.extent.height); /* Copy the VRS rates to the HTILE buffer. */ - radv_copy_vrs_htile(cmd_buffer, render->vrs_att.iview->image, &render->area, ds_image, - &htile_buffer, true); + radv_copy_vrs_htile(cmd_buffer, render->vrs_att.iview->image, &render->area, ds_image, &htile_buffer, true); radv_buffer_finish(&htile_buffer); } else { @@ -8065,7 +7708,7 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe struct radv_image *ds_image = radv_cmd_buffer_get_vrs_image(cmd_buffer); if (ds_image && render->area.offset.x < ds_image->vk.extent.width && - render->area.offset.y < ds_image->vk.extent.height) { + render->area.offset.y < ds_image->vk.extent.height) { /* HTILE buffer */ struct radv_buffer *htile_buffer = cmd_buffer->device->vrs.buffer; @@ -8074,19 +7717,17 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe area.extent.height = MIN2(area.extent.height, ds_image->vk.extent.height - area.offset.y); /* Copy the VRS rates to the HTILE buffer. */ - radv_copy_vrs_htile(cmd_buffer, render->vrs_att.iview->image, &area, ds_image, - htile_buffer, false); + radv_copy_vrs_htile(cmd_buffer, render->vrs_att.iview->image, &area, ds_image, htile_buffer, false); } } } radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 6); radeon_set_context_reg(cmd_buffer->cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, - S_028204_TL_X(render->area.offset.x) | - S_028204_TL_Y(render->area.offset.y)); + S_028204_TL_X(render->area.offset.x) | S_028204_TL_Y(render->area.offset.y)); radeon_set_context_reg(cmd_buffer->cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, S_028208_BR_X(render->area.offset.x + render->area.extent.width) | - S_028208_BR_Y(render->area.offset.y + render->area.extent.height)); + S_028208_BR_Y(render->area.offset.y + render->area.extent.height)); if (!(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)) radv_cmd_buffer_clear_rendering(cmd_buffer, pRenderingInfo); @@ -8103,8 +7744,8 @@ radv_CmdEndRendering(VkCommandBuffer commandBuffer) } static void -radv_emit_view_index_per_stage(struct radeon_cmdbuf *cs, const struct radv_shader *shader, - uint32_t base_reg, unsigned index) +radv_emit_view_index_per_stage(struct radeon_cmdbuf *cs, const struct radv_shader *shader, uint32_t base_reg, + unsigned index) { const struct radv_userdata_info *loc = radv_get_user_sgpr(shader, AC_UD_VIEW_INDEX); @@ -8119,22 +7760,20 @@ radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index) { struct radeon_cmdbuf *cs = cmd_buffer->cs; - radv_foreach_stage(stage, cmd_buffer->state.active_stages & ~VK_SHADER_STAGE_TASK_BIT_EXT) { + radv_foreach_stage(stage, cmd_buffer->state.active_stages & ~VK_SHADER_STAGE_TASK_BIT_EXT) + { const struct radv_shader *shader = radv_get_shader(cmd_buffer->state.shaders, stage); radv_emit_view_index_per_stage(cs, shader, shader->info.user_data_0, index); } if (cmd_buffer->state.gs_copy_shader) { - radv_emit_view_index_per_stage(cs, cmd_buffer->state.gs_copy_shader, - R_00B130_SPI_SHADER_USER_DATA_VS_0, index); + radv_emit_view_index_per_stage(cs, cmd_buffer->state.gs_copy_shader, R_00B130_SPI_SHADER_USER_DATA_VS_0, index); } if (cmd_buffer->state.active_stages & VK_SHADER_STAGE_TASK_BIT_EXT) { - radv_emit_view_index_per_stage(cmd_buffer->gang.cs, - cmd_buffer->state.shaders[MESA_SHADER_TASK], - cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.user_data_0, - index); + radv_emit_view_index_per_stage(cmd_buffer->gang.cs, cmd_buffer->state.shaders[MESA_SHADER_TASK], + cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.user_data_0, index); } } @@ -8147,8 +7786,8 @@ radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index) * space in the upload BO and emit some packets to invert the condition. */ static void -radv_cs_emit_compute_predication(struct radv_cmd_state *state, struct radeon_cmdbuf *cs, - uint64_t inv_va, bool *inv_emitted, unsigned dwords) +radv_cs_emit_compute_predication(struct radv_cmd_state *state, struct radeon_cmdbuf *cs, uint64_t inv_va, + bool *inv_emitted, unsigned dwords) { if (!state->predicating) return; @@ -8162,8 +7801,8 @@ radv_cs_emit_compute_predication(struct radv_cmd_state *state, struct radeon_cmd /* Write 1 to the inverted predication VA. */ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); + radeon_emit(cs, + COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM); radeon_emit(cs, 1); radeon_emit(cs, 0); radeon_emit(cs, inv_va); @@ -8178,8 +7817,8 @@ radv_cs_emit_compute_predication(struct radv_cmd_state *state, struct radeon_cmd /* Write 0 to the new predication VA (when the API condition != 0) */ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); + radeon_emit(cs, + COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM); radeon_emit(cs, 0); radeon_emit(cs, 0); radeon_emit(cs, inv_va); @@ -8192,13 +7831,12 @@ radv_cs_emit_compute_predication(struct radv_cmd_state *state, struct radeon_cmd radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0)); radeon_emit(cs, va); radeon_emit(cs, va >> 32); - radeon_emit(cs, 0); /* Cache policy */ + radeon_emit(cs, 0); /* Cache policy */ radeon_emit(cs, dwords); /* Size of the predicated packet(s) in DWORDs. */ } static void -radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_count, - uint32_t use_opaque) +radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_count, uint32_t use_opaque) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating)); radeon_emit(cmd_buffer->cs, vertex_count); @@ -8213,8 +7851,8 @@ radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_cou * Hardware uses this information to return 0 for out-of-bounds reads. */ static void -radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t index_va, - uint32_t max_index_count, uint32_t index_count, bool not_eop) +radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t index_va, uint32_t max_index_count, + uint32_t index_count, bool not_eop) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_2, 4, cmd_buffer->state.predicating)); radeon_emit(cmd_buffer->cs, max_index_count); @@ -8230,8 +7868,8 @@ radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t in /* MUST inline this function to avoid massive perf loss in drawoverhead */ ALWAYS_INLINE static void -radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool indexed, - uint32_t draw_count, uint64_t count_va, uint32_t stride) +radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool indexed, uint32_t draw_count, + uint64_t count_va, uint32_t stride) { struct radeon_cmdbuf *cs = cmd_buffer->cs; const unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX; @@ -8255,20 +7893,17 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool index draw_id_reg = ((base_reg + mesh * 12 + 4) - SI_SH_REG_OFFSET) >> 2; if (draw_count == 1 && !count_va && !draw_id_enable) { - radeon_emit(cs, - PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : PKT3_DRAW_INDIRECT, 3, predicating)); + radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : PKT3_DRAW_INDIRECT, 3, predicating)); radeon_emit(cs, 0); radeon_emit(cs, vertex_offset_reg); radeon_emit(cs, start_instance_reg); radeon_emit(cs, di_src_sel); } else { - radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI : PKT3_DRAW_INDIRECT_MULTI, 8, - predicating)); + radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI : PKT3_DRAW_INDIRECT_MULTI, 8, predicating)); radeon_emit(cs, 0); radeon_emit(cs, vertex_offset_reg); radeon_emit(cs, start_instance_reg); - radeon_emit(cs, draw_id_reg | S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) | - S_2C3_COUNT_INDIRECT_ENABLE(!!count_va)); + radeon_emit(cs, draw_id_reg | S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) | S_2C3_COUNT_INDIRECT_ENABLE(!!count_va)); radeon_emit(cs, draw_count); /* count */ radeon_emit(cs, count_va); /* count_addr */ radeon_emit(cs, count_va >> 32); @@ -8280,8 +7915,8 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool index } ALWAYS_INLINE static void -radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t draw_count, - uint64_t count_va, uint32_t stride) +radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t draw_count, uint64_t count_va, + uint32_t stride) { struct radeon_cmdbuf *cs = cmd_buffer->cs; uint32_t base_reg = cmd_buffer->state.vtx_base_sgpr; @@ -8305,12 +7940,10 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3 radeon_emit(cs, 0); /* data_offset */ radeon_emit(cs, S_4C1_XYZ_DIM_REG(xyz_dim_reg) | S_4C1_DRAW_INDEX_REG(draw_id_reg)); if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) - radeon_emit(cs, S_4C2_DRAW_INDEX_ENABLE(draw_id_enable) | - S_4C2_COUNT_INDIRECT_ENABLE(!!count_va) | + radeon_emit(cs, S_4C2_DRAW_INDEX_ENABLE(draw_id_enable) | S_4C2_COUNT_INDIRECT_ENABLE(!!count_va) | S_4C2_XYZ_DIM_ENABLE(xyz_dim_enable) | S_4C2_MODE1_ENABLE(mode1_enable)); else - radeon_emit( - cs, S_4C2_DRAW_INDEX_ENABLE(draw_id_enable) | S_4C2_COUNT_INDIRECT_ENABLE(!!count_va)); + radeon_emit(cs, S_4C2_DRAW_INDEX_ENABLE(draw_id_enable) | S_4C2_COUNT_INDIRECT_ENABLE(!!count_va)); radeon_emit(cs, draw_count); radeon_emit(cs, count_va & 0xFFFFFFFF); radeon_emit(cs, count_va >> 32); @@ -8319,22 +7952,19 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3 } ALWAYS_INLINE static void -radv_cs_emit_dispatch_taskmesh_direct_ace_packet(struct radv_cmd_buffer *cmd_buffer, - const uint32_t x, const uint32_t y, +radv_cs_emit_dispatch_taskmesh_direct_ace_packet(struct radv_cmd_buffer *cmd_buffer, const uint32_t x, const uint32_t y, const uint32_t z) { struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK]; struct radeon_cmdbuf *cs = cmd_buffer->gang.cs; const bool predicating = cmd_buffer->state.predicating; - const uint32_t dispatch_initiator = cmd_buffer->device->dispatch_initiator_task | - S_00B800_CS_W32_EN(task_shader->info.wave_size == 32); + const uint32_t dispatch_initiator = + cmd_buffer->device->dispatch_initiator_task | S_00B800_CS_W32_EN(task_shader->info.wave_size == 32); - const struct radv_userdata_info *ring_entry_loc = - radv_get_user_sgpr(task_shader, AC_UD_TASK_RING_ENTRY); + const struct radv_userdata_info *ring_entry_loc = radv_get_user_sgpr(task_shader, AC_UD_TASK_RING_ENTRY); assert(ring_entry_loc && ring_entry_loc->sgpr_idx != -1 && ring_entry_loc->num_sgprs == 1); - uint32_t ring_entry_reg = - (R_00B900_COMPUTE_USER_DATA_0 + ring_entry_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2; + uint32_t ring_entry_reg = (R_00B900_COMPUTE_USER_DATA_0 + ring_entry_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2; radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_DIRECT_ACE, 4, predicating) | PKT3_SHADER_TYPE_S(1)); radeon_emit(cs, x); @@ -8345,9 +7975,8 @@ radv_cs_emit_dispatch_taskmesh_direct_ace_packet(struct radv_cmd_buffer *cmd_buf } ALWAYS_INLINE static void -radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(struct radv_cmd_buffer *cmd_buffer, - uint64_t data_va, uint32_t draw_count, - uint64_t count_va, uint32_t stride) +radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t data_va, + uint32_t draw_count, uint64_t count_va, uint32_t stride) { assert((data_va & 0x03) == 0); assert((count_va & 0x03) == 0); @@ -8357,15 +7986,12 @@ radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(struct radv_cmd_buffer const uint32_t xyz_dim_enable = task_shader->info.cs.uses_grid_size; const uint32_t draw_id_enable = task_shader->info.vs.needs_draw_id; - const uint32_t dispatch_initiator = cmd_buffer->device->dispatch_initiator_task | - S_00B800_CS_W32_EN(task_shader->info.wave_size == 32); + const uint32_t dispatch_initiator = + cmd_buffer->device->dispatch_initiator_task | S_00B800_CS_W32_EN(task_shader->info.wave_size == 32); - const struct radv_userdata_info *ring_entry_loc = - radv_get_user_sgpr(task_shader, AC_UD_TASK_RING_ENTRY); - const struct radv_userdata_info *xyz_dim_loc = - radv_get_user_sgpr(task_shader, AC_UD_CS_GRID_SIZE); - const struct radv_userdata_info *draw_id_loc = - radv_get_user_sgpr(task_shader, AC_UD_CS_TASK_DRAW_ID); + const struct radv_userdata_info *ring_entry_loc = radv_get_user_sgpr(task_shader, AC_UD_TASK_RING_ENTRY); + const struct radv_userdata_info *xyz_dim_loc = radv_get_user_sgpr(task_shader, AC_UD_CS_GRID_SIZE); + const struct radv_userdata_info *draw_id_loc = radv_get_user_sgpr(task_shader, AC_UD_CS_TASK_DRAW_ID); assert(ring_entry_loc->sgpr_idx != -1 && ring_entry_loc->num_sgprs == 1); assert(!xyz_dim_enable || (xyz_dim_loc->sgpr_idx != -1 && xyz_dim_loc->num_sgprs == 3)); @@ -8374,20 +8000,15 @@ radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(struct radv_cmd_buffer const uint32_t ring_entry_reg = (R_00B900_COMPUTE_USER_DATA_0 + ring_entry_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2; const uint32_t xyz_dim_reg = - !xyz_dim_enable - ? 0 - : (R_00B900_COMPUTE_USER_DATA_0 + xyz_dim_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2; + !xyz_dim_enable ? 0 : (R_00B900_COMPUTE_USER_DATA_0 + xyz_dim_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2; const uint32_t draw_id_reg = - !draw_id_enable - ? 0 - : (R_00B900_COMPUTE_USER_DATA_0 + draw_id_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2; + !draw_id_enable ? 0 : (R_00B900_COMPUTE_USER_DATA_0 + draw_id_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2; radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_INDIRECT_MULTI_ACE, 9, 0) | PKT3_SHADER_TYPE_S(1)); radeon_emit(cs, data_va); radeon_emit(cs, data_va >> 32); radeon_emit(cs, S_AD2_RING_ENTRY_REG(ring_entry_reg)); - radeon_emit(cs, S_AD3_COUNT_INDIRECT_ENABLE(!!count_va) | - S_AD3_DRAW_INDEX_ENABLE(draw_id_enable) | + radeon_emit(cs, S_AD3_COUNT_INDIRECT_ENABLE(!!count_va) | S_AD3_DRAW_INDEX_ENABLE(draw_id_enable) | S_AD3_XYZ_DIM_ENABLE(xyz_dim_enable) | S_AD3_DRAW_INDEX_REG(draw_id_reg)); radeon_emit(cs, S_AD4_XYZ_DIM_REG(xyz_dim_reg)); radeon_emit(cs, draw_count); @@ -8413,8 +8034,7 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(struct radv_cmd_buffer *cmd_buffer) uint32_t ring_entry_reg = ((base_reg + ring_entry_loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2; uint32_t xyz_dim_en = 1; /* TODO: disable XYZ_DIM when unneeded */ uint32_t mode1_en = 1; /* legacy fast launch mode */ - uint32_t linear_dispatch_en = - cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.cs.linear_taskmesh_dispatch; + uint32_t linear_dispatch_en = cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.cs.linear_taskmesh_dispatch; radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_GFX, 2, predicating) | PKT3_RESET_FILTER_CAM(1)); radeon_emit(cs, S_4D0_RING_ENTRY_REG(ring_entry_reg) | S_4D0_XYZ_DIM_REG(xyz_dim_reg)); @@ -8427,8 +8047,8 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(struct radv_cmd_buffer *cmd_buffer) } ALWAYS_INLINE static void -radv_emit_userdata_vertex_internal(struct radv_cmd_buffer *cmd_buffer, - const struct radv_draw_info *info, const uint32_t vertex_offset) +radv_emit_userdata_vertex_internal(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, + const uint32_t vertex_offset) { struct radv_cmd_state *state = &cmd_buffer->state; struct radeon_cmdbuf *cs = cmd_buffer->cs; @@ -8475,12 +8095,10 @@ radv_emit_userdata_vertex_drawid(struct radv_cmd_buffer *cmd_buffer, uint32_t ve state->last_vertex_offset = vertex_offset; if (drawid) radeon_emit(cs, drawid); - } ALWAYS_INLINE static void -radv_emit_userdata_mesh(struct radv_cmd_buffer *cmd_buffer, - const uint32_t x, const uint32_t y, const uint32_t z) +radv_emit_userdata_mesh(struct radv_cmd_buffer *cmd_buffer, const uint32_t x, const uint32_t y, const uint32_t z) { struct radv_cmd_state *state = &cmd_buffer->state; struct radeon_cmdbuf *cs = cmd_buffer->cs; @@ -8498,15 +8116,13 @@ radv_emit_userdata_mesh(struct radv_cmd_buffer *cmd_buffer, } ALWAYS_INLINE static void -radv_emit_userdata_task(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z, - uint32_t draw_id) +radv_emit_userdata_task(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z, uint32_t draw_id) { struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK]; struct radeon_cmdbuf *cs = cmd_buffer->gang.cs; const struct radv_userdata_info *xyz_loc = radv_get_user_sgpr(task_shader, AC_UD_CS_GRID_SIZE); - const struct radv_userdata_info *draw_id_loc = - radv_get_user_sgpr(task_shader, AC_UD_CS_TASK_DRAW_ID); + const struct radv_userdata_info *draw_id_loc = radv_get_user_sgpr(task_shader, AC_UD_CS_TASK_DRAW_ID); if (xyz_loc->sgpr_idx != -1) { assert(xyz_loc->num_sgprs == 3); @@ -8531,8 +8147,7 @@ radv_emit_userdata_task(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t * which requires 0 for out-of-bounds access. */ static void -radv_handle_zero_index_buffer_bug(struct radv_cmd_buffer *cmd_buffer, uint64_t *index_va, - uint32_t *remaining_indexes) +radv_handle_zero_index_buffer_bug(struct radv_cmd_buffer *cmd_buffer, uint64_t *index_va, uint32_t *remaining_indexes) { const uint32_t zero = 0; uint32_t offset; @@ -8547,10 +8162,8 @@ radv_handle_zero_index_buffer_bug(struct radv_cmd_buffer *cmd_buffer, uint64_t * } ALWAYS_INLINE static void -radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, - const struct radv_draw_info *info, - uint32_t drawCount, const VkMultiDrawIndexedInfoEXT *minfo, - uint32_t stride, +radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, + uint32_t drawCount, const VkMultiDrawIndexedInfoEXT *minfo, uint32_t stride, const int32_t *vertexOffset) { @@ -8559,19 +8172,17 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, const int index_size = radv_get_vgt_index_size(state->index_type); unsigned i = 0; const bool uses_drawid = state->uses_drawid; - const bool can_eop = - !uses_drawid && cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10; + const bool can_eop = !uses_drawid && cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10; if (uses_drawid) { if (vertexOffset) { radv_emit_userdata_vertex(cmd_buffer, info, *vertexOffset); - vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) { + vk_foreach_multi_draw_indexed (draw, i, minfo, drawCount, stride) { uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex; uint64_t index_va = state->index_va + draw->firstIndex * index_size; /* Handle draw calls with 0-sized index buffers if the GPU can't support them. */ - if (!remaining_indexes && - cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) + if (!remaining_indexes && cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) radv_handle_zero_index_buffer_bug(cmd_buffer, &index_va, &remaining_indexes); if (i > 0) @@ -8580,7 +8191,7 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, if (!state->render.view_mask) { radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); } else { - u_foreach_bit(view, state->render.view_mask) { + u_foreach_bit (view, state->render.view_mask) { radv_emit_view_index(cmd_buffer, view); radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); @@ -8588,13 +8199,12 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, } } } else { - vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) { + vk_foreach_multi_draw_indexed (draw, i, minfo, drawCount, stride) { uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex; uint64_t index_va = state->index_va + draw->firstIndex * index_size; /* Handle draw calls with 0-sized index buffers if the GPU can't support them. */ - if (!remaining_indexes && - cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) + if (!remaining_indexes && cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) radv_handle_zero_index_buffer_bug(cmd_buffer, &index_va, &remaining_indexes); if (i > 0) { @@ -8609,7 +8219,7 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, if (!state->render.view_mask) { radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); } else { - u_foreach_bit(view, state->render.view_mask) { + u_foreach_bit (view, state->render.view_mask) { radv_emit_view_index(cmd_buffer, view); radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); @@ -8627,7 +8237,8 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, * count == 0 for the last draw that doesn't have NOT_EOP. */ while (drawCount > 1) { - const VkMultiDrawIndexedInfoEXT *last = (const VkMultiDrawIndexedInfoEXT*)(((const uint8_t*)minfo) + (drawCount - 1) * stride); + const VkMultiDrawIndexedInfoEXT *last = + (const VkMultiDrawIndexedInfoEXT *)(((const uint8_t *)minfo) + (drawCount - 1) * stride); if (last->indexCount) break; drawCount--; @@ -8635,19 +8246,19 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, } radv_emit_userdata_vertex(cmd_buffer, info, *vertexOffset); - vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) { + vk_foreach_multi_draw_indexed (draw, i, minfo, drawCount, stride) { uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex; uint64_t index_va = state->index_va + draw->firstIndex * index_size; /* Handle draw calls with 0-sized index buffers if the GPU can't support them. */ - if (!remaining_indexes && - cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) + if (!remaining_indexes && cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) radv_handle_zero_index_buffer_bug(cmd_buffer, &index_va, &remaining_indexes); if (!state->render.view_mask) { - radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, can_eop && i < drawCount - 1); + radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, + can_eop && i < drawCount - 1); } else { - u_foreach_bit(view, state->render.view_mask) { + u_foreach_bit (view, state->render.view_mask) { radv_emit_view_index(cmd_buffer, view); radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); @@ -8655,23 +8266,24 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, } } } else { - vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) { + vk_foreach_multi_draw_indexed (draw, i, minfo, drawCount, stride) { uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex; uint64_t index_va = state->index_va + draw->firstIndex * index_size; /* Handle draw calls with 0-sized index buffers if the GPU can't support them. */ - if (!remaining_indexes && - cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) + if (!remaining_indexes && cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) radv_handle_zero_index_buffer_bug(cmd_buffer, &index_va, &remaining_indexes); - const VkMultiDrawIndexedInfoEXT *next = (const VkMultiDrawIndexedInfoEXT*)(i < drawCount - 1 ? ((uint8_t*)draw + stride) : NULL); + const VkMultiDrawIndexedInfoEXT *next = + (const VkMultiDrawIndexedInfoEXT *)(i < drawCount - 1 ? ((uint8_t *)draw + stride) : NULL); const bool offset_changes = next && next->vertexOffset != draw->vertexOffset; radv_emit_userdata_vertex(cmd_buffer, info, draw->vertexOffset); if (!state->render.view_mask) { - radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, can_eop && !offset_changes && i < drawCount - 1); + radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, + can_eop && !offset_changes && i < drawCount - 1); } else { - u_foreach_bit(view, state->render.view_mask) { + u_foreach_bit (view, state->render.view_mask) { radv_emit_view_index(cmd_buffer, view); radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); @@ -8686,16 +8298,15 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, } ALWAYS_INLINE static void -radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, - uint32_t drawCount, const VkMultiDrawInfoEXT *minfo, - uint32_t use_opaque, uint32_t stride) +radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, uint32_t drawCount, + const VkMultiDrawInfoEXT *minfo, uint32_t use_opaque, uint32_t stride) { unsigned i = 0; const uint32_t view_mask = cmd_buffer->state.render.view_mask; const bool uses_drawid = cmd_buffer->state.uses_drawid; uint32_t last_start = 0; - vk_foreach_multi_draw(draw, i, minfo, drawCount, stride) { + vk_foreach_multi_draw (draw, i, minfo, drawCount, stride) { if (!i) radv_emit_userdata_vertex(cmd_buffer, info, draw->firstVertex); else @@ -8704,7 +8315,7 @@ radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct r if (!view_mask) { radv_cs_emit_draw_packet(cmd_buffer, draw->vertexCount, use_opaque); } else { - u_foreach_bit(view, view_mask) { + u_foreach_bit (view, view_mask) { radv_emit_view_index(cmd_buffer, view); radv_cs_emit_draw_packet(cmd_buffer, draw->vertexCount, use_opaque); } @@ -8712,17 +8323,16 @@ radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct r last_start = draw->firstVertex; } if (drawCount > 1) { - struct radv_cmd_state *state = &cmd_buffer->state; - assert(state->last_vertex_offset_valid); - state->last_vertex_offset = last_start; - if (uses_drawid) - state->last_drawid = drawCount - 1; + struct radv_cmd_state *state = &cmd_buffer->state; + assert(state->last_vertex_offset_valid); + state->last_vertex_offset = last_start; + if (uses_drawid) + state->last_drawid = drawCount - 1; } } ALWAYS_INLINE static void -radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, - uint32_t x, uint32_t y, uint32_t z) +radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z) { const uint32_t view_mask = cmd_buffer->state.render.view_mask; const uint32_t count = x * y * z; @@ -8732,7 +8342,7 @@ radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, if (!view_mask) { radv_cs_emit_draw_packet(cmd_buffer, count, 0); } else { - u_foreach_bit(view, view_mask) { + u_foreach_bit (view, view_mask) { radv_emit_view_index(cmd_buffer, view); radv_cs_emit_draw_packet(cmd_buffer, count, 0); } @@ -8740,18 +8350,15 @@ radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, } ALWAYS_INLINE static void -radv_emit_indirect_mesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, - const struct radv_draw_info *info) +radv_emit_indirect_mesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info) { const struct radv_cmd_state *state = &cmd_buffer->state; struct radeon_winsys *ws = cmd_buffer->device->ws; struct radeon_cmdbuf *cs = cmd_buffer->cs; - const uint64_t va = - radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset; - const uint64_t count_va = !info->count_buffer - ? 0 - : radv_buffer_get_va(info->count_buffer->bo) + - info->count_buffer->offset + info->count_buffer_offset; + const uint64_t va = radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset; + const uint64_t count_va = !info->count_buffer ? 0 + : radv_buffer_get_va(info->count_buffer->bo) + + info->count_buffer->offset + info->count_buffer_offset; radv_cs_add_buffer(ws, cs, info->indirect->bo); @@ -8780,17 +8387,15 @@ radv_emit_indirect_mesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, } ALWAYS_INLINE static void -radv_emit_direct_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, - uint32_t z) +radv_emit_direct_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z) { const uint32_t view_mask = cmd_buffer->state.render.view_mask; const unsigned num_views = MAX2(1, util_bitcount(view_mask)); unsigned ace_predication_size = num_views * 6; /* DISPATCH_TASKMESH_DIRECT_ACE size */ radv_emit_userdata_task(cmd_buffer, x, y, z, 0); - radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->gang.cs, - cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted, - ace_predication_size); + radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->gang.cs, cmd_buffer->mec_inv_pred_va, + &cmd_buffer->mec_inv_pred_emitted, ace_predication_size); if (!view_mask) { radv_cs_emit_dispatch_taskmesh_direct_ace_packet(cmd_buffer, x, y, z); @@ -8805,8 +8410,7 @@ radv_emit_direct_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, uint3 } static void -radv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, - const struct radv_draw_info *info) +radv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info) { const uint32_t view_mask = cmd_buffer->state.render.view_mask; struct radeon_winsys *ws = cmd_buffer->device->ws; @@ -8814,12 +8418,10 @@ radv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, unsigned ace_predication_size = num_views * 11; /* DISPATCH_TASKMESH_INDIRECT_MULTI_ACE size */ struct radeon_cmdbuf *ace_cs = cmd_buffer->gang.cs; - const uint64_t va = - radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset; - const uint64_t count_va = !info->count_buffer - ? 0 - : radv_buffer_get_va(info->count_buffer->bo) + - info->count_buffer->offset + info->count_buffer_offset; + const uint64_t va = radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset; + const uint64_t count_va = !info->count_buffer ? 0 + : radv_buffer_get_va(info->count_buffer->bo) + + info->count_buffer->offset + info->count_buffer_offset; uint64_t workaround_cond_va = 0; if (num_views > 1) @@ -8850,8 +8452,8 @@ radv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, workaround_cond_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + workaround_cond_off; radeon_emit(ace_cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(ace_cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); + radeon_emit(ace_cs, + COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM); radeon_emit(ace_cs, 1); radeon_emit(ace_cs, 0); radeon_emit(ace_cs, workaround_cond_va); @@ -8862,21 +8464,19 @@ radv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, } radv_cs_add_buffer(ws, cmd_buffer->gang.cs, info->indirect->bo); - radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->gang.cs, - cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted, - ace_predication_size); + radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->gang.cs, cmd_buffer->mec_inv_pred_va, + &cmd_buffer->mec_inv_pred_emitted, ace_predication_size); if (workaround_cond_va) { radeon_emit(ace_cs, PKT3(PKT3_COND_EXEC, 3, 0)); radeon_emit(ace_cs, count_va); radeon_emit(ace_cs, count_va >> 32); radeon_emit(ace_cs, 0); - radeon_emit(ace_cs, - 6 + 11 * num_views); /* 1x COPY_DATA + Nx DISPATCH_TASKMESH_INDIRECT_MULTI_ACE */ + radeon_emit(ace_cs, 6 + 11 * num_views); /* 1x COPY_DATA + Nx DISPATCH_TASKMESH_INDIRECT_MULTI_ACE */ radeon_emit(ace_cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(ace_cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); + radeon_emit(ace_cs, + COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM); radeon_emit(ace_cs, 0); radeon_emit(ace_cs, 0); radeon_emit(ace_cs, workaround_cond_va); @@ -8884,14 +8484,12 @@ radv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, } if (!view_mask) { - radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(cmd_buffer, va, info->count, - count_va, info->stride); + radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(cmd_buffer, va, info->count, count_va, info->stride); radv_cs_emit_dispatch_taskmesh_gfx_packet(cmd_buffer); } else { u_foreach_bit (view, view_mask) { radv_emit_view_index(cmd_buffer, view); - radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(cmd_buffer, va, info->count, - count_va, info->stride); + radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(cmd_buffer, va, info->count, count_va, info->stride); radv_cs_emit_dispatch_taskmesh_gfx_packet(cmd_buffer); } } @@ -8910,18 +8508,15 @@ radv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, } static void -radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer, - const struct radv_draw_info *info) +radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info) { const struct radv_cmd_state *state = &cmd_buffer->state; struct radeon_winsys *ws = cmd_buffer->device->ws; struct radeon_cmdbuf *cs = cmd_buffer->cs; - const uint64_t va = - radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset; - const uint64_t count_va = info->count_buffer - ? radv_buffer_get_va(info->count_buffer->bo) + - info->count_buffer->offset + info->count_buffer_offset - : 0; + const uint64_t va = radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset; + const uint64_t count_va = info->count_buffer ? radv_buffer_get_va(info->count_buffer->bo) + + info->count_buffer->offset + info->count_buffer_offset + : 0; radv_cs_add_buffer(ws, cs, info->indirect->bo); @@ -8935,15 +8530,12 @@ radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer, } if (!state->render.view_mask) { - radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va, - info->stride); + radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va, info->stride); } else { - u_foreach_bit(i, state->render.view_mask) - { + u_foreach_bit (i, state->render.view_mask) { radv_emit_view_index(cmd_buffer, i); - radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va, - info->stride); + radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va, info->stride); } } } @@ -8965,20 +8557,18 @@ radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer, * any context registers. */ static bool -radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer, - const struct radv_draw_info *info) +radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info) { if (cmd_buffer->state.context_roll_without_scissor_emitted || info->strmout_buffer) return true; - uint64_t used_states = - cmd_buffer->state.graphics_pipeline->needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL; + uint64_t used_states = cmd_buffer->state.graphics_pipeline->needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL; /* Index, vertex and streamout buffers don't change context regs. * We assume that any other dirty flag causes context rolls. */ - used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER | - RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT | RADV_CMD_DIRTY_STREAMOUT_BUFFER); + used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER | RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT | + RADV_CMD_DIRTY_STREAMOUT_BUFFER); return cmd_buffer->state.dirty & used_states; } @@ -8998,8 +8588,7 @@ radv_get_ngg_culling_settings(struct radv_cmd_buffer *cmd_buffer, bool vp_y_inve * because we don't know the primitive topology at compile time, so we should * disable it dynamically for points or lines. */ - const unsigned num_vertices_per_prim = - si_conv_prim_to_gs_out(d->vk.ia.primitive_topology, true) + 1; + const unsigned num_vertices_per_prim = si_conv_prim_to_gs_out(d->vk.ia.primitive_topology, true) + 1; if (num_vertices_per_prim != 3) return radv_nggc_none; @@ -9037,7 +8626,7 @@ radv_get_ngg_culling_settings(struct radv_cmd_buffer *cmd_buffer, bool vp_y_inve unsigned rasterization_samples = radv_get_rasterization_samples(cmd_buffer); unsigned subpixel_bits = 256; int32_t small_prim_precision_log2 = util_logbase2(rasterization_samples) - util_logbase2(subpixel_bits); - nggc_settings |= ((uint32_t) small_prim_precision_log2 << 24u); + nggc_settings |= ((uint32_t)small_prim_precision_log2 << 24u); } return nggc_settings; @@ -9068,20 +8657,18 @@ radv_emit_ngg_culling_state(struct radv_cmd_buffer *cmd_buffer) /* Correction for number of samples per pixel. */ for (unsigned i = 0; i < 2; ++i) { - vp_scale[i] *= (float) cmd_buffer->state.dynamic.vk.ms.rasterization_samples; - vp_translate[i] *= (float) cmd_buffer->state.dynamic.vk.ms.rasterization_samples; + vp_scale[i] *= (float)cmd_buffer->state.dynamic.vk.ms.rasterization_samples; + vp_translate[i] *= (float)cmd_buffer->state.dynamic.vk.ms.rasterization_samples; } uint32_t vp_reg_values[4] = {fui(vp_scale[0]), fui(vp_scale[1]), fui(vp_translate[0]), fui(vp_translate[1])}; - const int8_t vp_sgpr_idx = - radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_VIEWPORT)->sgpr_idx; + const int8_t vp_sgpr_idx = radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_VIEWPORT)->sgpr_idx; assert(vp_sgpr_idx != -1); radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + vp_sgpr_idx * 4, 4); radeon_emit_array(cmd_buffer->cs, vp_reg_values, 4); } - const int8_t nggc_sgpr_idx = - radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_CULLING_SETTINGS)->sgpr_idx; + const int8_t nggc_sgpr_idx = radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_CULLING_SETTINGS)->sgpr_idx; assert(nggc_sgpr_idx != -1); radeon_set_sh_reg(cmd_buffer->cs, base_reg + nggc_sgpr_idx * 4, nggc_settings); @@ -9126,10 +8713,9 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r if (cmd_buffer->state.graphics_pipeline->ps_epilog) { ps_epilog = cmd_buffer->state.graphics_pipeline->ps_epilog; } else if ((cmd_buffer->state.emitted_graphics_pipeline != cmd_buffer->state.graphics_pipeline || - (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK | - RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_EQUATION)))) { + (cmd_buffer->state.dirty & + (RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK | RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE | + RADV_CMD_DIRTY_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE | RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_EQUATION)))) { ps_epilog = lookup_ps_epilog(cmd_buffer); if (!ps_epilog) { vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -9139,8 +8725,7 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r cmd_buffer->state.col_format_non_compacted = ps_epilog->spi_shader_col_format; bool need_null_export_workaround = radv_needs_null_export_workaround( - device, cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT], - cmd_buffer->state.custom_blend_mode); + device, cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT], cmd_buffer->state.custom_blend_mode); if (need_null_export_workaround && !cmd_buffer->state.col_format_non_compacted) cmd_buffer->state.col_format_non_compacted = V_028714_SPI_SHADER_32_R; @@ -9153,9 +8738,9 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r * 1. radv_need_late_scissor_emission * 2. any dirty dynamic flags that may cause context rolls */ - const bool late_scissor_emission = - cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug - ? radv_need_late_scissor_emission(cmd_buffer, info) : false; + const bool late_scissor_emission = cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug + ? radv_need_late_scissor_emission(cmd_buffer, info) + : false; if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_RBPLUS) radv_emit_rbplus_state(cmd_buffer); @@ -9167,17 +8752,16 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r radv_flush_occlusion_query_state(cmd_buffer); if ((cmd_buffer->state.dirty & - (RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | - RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE | RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_VIEWPORT | RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE | - RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY | - RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS_ENABLE)) && + (RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE | + RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT | + RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE | RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | + RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS_ENABLE)) && cmd_buffer->state.has_nggc) radv_emit_ngg_culling_state(cmd_buffer); - if ((cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK | - RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | - RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE)) || + if ((cmd_buffer->state.dirty & + (RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK | RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | + RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE)) || cmd_buffer->state.emitted_graphics_pipeline != cmd_buffer->state.graphics_pipeline) radv_emit_binning_state(cmd_buffer); @@ -9201,8 +8785,8 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r if (dynamic_states) { radv_cmd_buffer_flush_dynamic_state(cmd_buffer, dynamic_states); - if (dynamic_states & (RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | - RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE)) + if (dynamic_states & + (RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE)) radv_emit_fs_state(cmd_buffer); } @@ -9251,9 +8835,8 @@ radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info /* Use optimal packet order based on whether we need to sync the * pipeline. */ - if (cmd_buffer->state.flush_bits & - (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB | - RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { + if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB | + RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { /* If we have to wait for idle, set all states first, so that * all SET packets are processed in parallel with previous draw * calls. Then upload descriptors, set shader pointers, and @@ -9291,8 +8874,7 @@ radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info struct radv_cmd_state *state = &cmd_buffer->state; struct radeon_cmdbuf *cs = cmd_buffer->cs; assert(state->vtx_base_sgpr); - if (state->last_num_instances != info->instance_count || - cmd_buffer->device->uses_shadow_regs) { + if (state->last_num_instances != info->instance_count || cmd_buffer->device->uses_shadow_regs) { radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, false)); radeon_emit(cs, info->instance_count); state->last_num_instances = info->instance_count; @@ -9304,8 +8886,7 @@ radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info } ALWAYS_INLINE static bool -radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, - uint32_t drawCount) +radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, uint32_t drawCount) { /* For direct draws, this makes sure we don't draw anything. * For indirect draws, this is necessary to prevent a GPU hang (on MEC version < 100). @@ -9319,16 +8900,16 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_ assert(!task_shader || ace_cs); - const VkShaderStageFlags stages = VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_FRAGMENT_BIT | (task_shader ? VK_SHADER_STAGE_TASK_BIT_EXT : 0); - const bool pipeline_is_dirty = - cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE && - cmd_buffer->state.graphics_pipeline != cmd_buffer->state.emitted_graphics_pipeline; + const VkShaderStageFlags stages = + VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_FRAGMENT_BIT | (task_shader ? VK_SHADER_STAGE_TASK_BIT_EXT : 0); + const bool pipeline_is_dirty = cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE && + cmd_buffer->state.graphics_pipeline != cmd_buffer->state.emitted_graphics_pipeline; const bool need_task_semaphore = task_shader && radv_flush_gang_leader_semaphore(cmd_buffer); ASSERTED const unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096 + 128 * (drawCount - 1)); - ASSERTED const unsigned ace_cdw_max = !ace_cs ? 0 : - radeon_check_space(cmd_buffer->device->ws, ace_cs, 4096 + 128 * (drawCount - 1)); + ASSERTED const unsigned ace_cdw_max = + !ace_cs ? 0 : radeon_check_space(cmd_buffer->device->ws, ace_cs, 4096 + 128 * (drawCount - 1)); if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) radv_emit_fb_mip_change_flush(cmd_buffer); @@ -9351,8 +8932,7 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_ radv_flush_descriptors(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS); - const VkShaderStageFlags pc_stages = - radv_must_flush_constants(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS); + const VkShaderStageFlags pc_stages = radv_must_flush_constants(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS); if (pc_stages) radv_flush_constants(cmd_buffer, pc_stages, VK_PIPELINE_BIND_POINT_GRAPHICS); @@ -9392,8 +8972,7 @@ radv_after_draw(struct radv_cmd_buffer *cmd_buffer) * It must be done after drawing. */ if (radv_is_streamout_enabled(cmd_buffer) && - (rad_info->family == CHIP_HAWAII || rad_info->family == CHIP_TONGA || - rad_info->family == CHIP_FIJI)) { + (rad_info->family == CHIP_HAWAII || rad_info->family == CHIP_TONGA || rad_info->family == CHIP_FIJI)) { cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_STREAMOUT_SYNC; } @@ -9401,8 +8980,8 @@ radv_after_draw(struct radv_cmd_buffer *cmd_buffer) } VKAPI_ATTR void VKAPI_CALL -radv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, - uint32_t firstVertex, uint32_t firstInstance) +radv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, + uint32_t firstInstance) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_draw_info info; @@ -9416,14 +8995,14 @@ radv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t insta if (!radv_before_draw(cmd_buffer, &info, 1)) return; - const VkMultiDrawInfoEXT minfo = { firstVertex, vertexCount }; + const VkMultiDrawInfoEXT minfo = {firstVertex, vertexCount}; radv_emit_direct_draw_packets(cmd_buffer, &info, 1, &minfo, 0, 0); radv_after_draw(cmd_buffer); } VKAPI_ATTR void VKAPI_CALL radv_CmdDrawMultiEXT(VkCommandBuffer commandBuffer, uint32_t drawCount, const VkMultiDrawInfoEXT *pVertexInfo, - uint32_t instanceCount, uint32_t firstInstance, uint32_t stride) + uint32_t instanceCount, uint32_t firstInstance, uint32_t stride) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_draw_info info; @@ -9445,8 +9024,8 @@ radv_CmdDrawMultiEXT(VkCommandBuffer commandBuffer, uint32_t drawCount, const Vk } VKAPI_ATTR void VKAPI_CALL -radv_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, - uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) +radv_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, + int32_t vertexOffset, uint32_t firstInstance) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_draw_info info; @@ -9460,14 +9039,15 @@ radv_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t if (!radv_before_draw(cmd_buffer, &info, 1)) return; - const VkMultiDrawIndexedInfoEXT minfo = { firstIndex, indexCount, vertexOffset }; + const VkMultiDrawIndexedInfoEXT minfo = {firstIndex, indexCount, vertexOffset}; radv_emit_draw_packets_indexed(cmd_buffer, &info, 1, &minfo, 0, NULL); radv_after_draw(cmd_buffer); } VKAPI_ATTR void VKAPI_CALL -radv_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer, uint32_t drawCount, const VkMultiDrawIndexedInfoEXT *pIndexInfo, - uint32_t instanceCount, uint32_t firstInstance, uint32_t stride, const int32_t *pVertexOffset) +radv_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer, uint32_t drawCount, + const VkMultiDrawIndexedInfoEXT *pIndexInfo, uint32_t instanceCount, uint32_t firstInstance, + uint32_t stride, const int32_t *pVertexOffset) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_draw_info info; @@ -9490,8 +9070,8 @@ radv_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer, uint32_t drawCount, c } VKAPI_ATTR void VKAPI_CALL -radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, - uint32_t drawCount, uint32_t stride) +radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, uint32_t drawCount, + uint32_t stride) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); @@ -9513,8 +9093,8 @@ radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSi } VKAPI_ATTR void VKAPI_CALL -radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, - uint32_t drawCount, uint32_t stride) +radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, uint32_t drawCount, + uint32_t stride) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); @@ -9536,9 +9116,8 @@ radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkD } VKAPI_ATTR void VKAPI_CALL -radv_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, - VkBuffer _countBuffer, VkDeviceSize countBufferOffset, - uint32_t maxDrawCount, uint32_t stride) +radv_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, VkBuffer _countBuffer, + VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); @@ -9562,9 +9141,8 @@ radv_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDev } VKAPI_ATTR void VKAPI_CALL -radv_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, - VkDeviceSize offset, VkBuffer _countBuffer, - VkDeviceSize countBufferOffset, uint32_t maxDrawCount, +radv_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, + VkBuffer _countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); @@ -9616,8 +9194,8 @@ radv_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, } VKAPI_ATTR void VKAPI_CALL -radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer, - VkDeviceSize offset, uint32_t drawCount, uint32_t stride) +radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, + uint32_t drawCount, uint32_t stride) { if (!drawCount) return; @@ -9649,9 +9227,8 @@ radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer } VKAPI_ATTR void VKAPI_CALL -radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer, - VkDeviceSize offset, VkBuffer _countBuffer, - VkDeviceSize countBufferOffset, uint32_t maxDrawCount, +radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, + VkBuffer _countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) { @@ -9688,8 +9265,7 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPre const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo) { VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - VK_FROM_HANDLE(radv_indirect_command_layout, layout, - pGeneratedCommandsInfo->indirectCommandsLayout); + VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout); VK_FROM_HANDLE(radv_buffer, prep_buffer, pGeneratedCommandsInfo->preprocessBuffer); /* The only actions that can be done are draws, so skip on other queues. */ @@ -9719,8 +9295,7 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPre return; uint32_t cmdbuf_size = radv_get_indirect_cmdbuf_size(pGeneratedCommandsInfo); - uint64_t va = radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset + - pGeneratedCommandsInfo->preprocessOffset; + uint64_t va = radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset + pGeneratedCommandsInfo->preprocessOffset; const uint32_t view_mask = cmd_buffer->state.render.view_mask; radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); @@ -9765,8 +9340,7 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPre } static void -radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, - const struct radv_shader *compute_shader, +radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *compute_shader, const struct radv_dispatch_info *info) { unsigned dispatch_initiator = cmd_buffer->device->dispatch_initiator; @@ -9817,8 +9391,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, if (radv_cmd_buffer_uses_mec(cmd_buffer)) { radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va, - &cmd_buffer->mec_inv_pred_emitted, - 4 /* DISPATCH_INDIRECT size */); + &cmd_buffer->mec_inv_pred_emitted, 4 /* DISPATCH_INDIRECT size */); radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) | PKT3_SHADER_TYPE_S(1)); radeon_emit(cs, info->va); radeon_emit(cs, info->va >> 32); @@ -9858,12 +9431,9 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, } radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); - radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) | - S_00B81C_NUM_THREAD_PARTIAL(remainder[0])); - radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) | - S_00B81C_NUM_THREAD_PARTIAL(remainder[1])); - radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) | - S_00B81C_NUM_THREAD_PARTIAL(remainder[2])); + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) | S_00B81C_NUM_THREAD_PARTIAL(remainder[0])); + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) | S_00B81C_NUM_THREAD_PARTIAL(remainder[1])); + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) | S_00B81C_NUM_THREAD_PARTIAL(remainder[2])); dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1); } @@ -9902,8 +9472,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, if (radv_cmd_buffer_uses_mec(cmd_buffer)) { radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va, - &cmd_buffer->mec_inv_pred_emitted, - 5 /* DISPATCH_DIRECT size */); + &cmd_buffer->mec_inv_pred_emitted, 5 /* DISPATCH_DIRECT size */); predicating = false; } @@ -9918,13 +9487,11 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, } static void -radv_upload_compute_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint bind_point) +radv_upload_compute_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point) { radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT, bind_point); - const VkShaderStageFlags stages = bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR - ? RADV_RT_STAGE_BITS - : VK_SHADER_STAGE_COMPUTE_BIT; + const VkShaderStageFlags stages = + bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR ? RADV_RT_STAGE_BITS : VK_SHADER_STAGE_COMPUTE_BIT; const VkShaderStageFlags pc_stages = radv_must_flush_constants(cmd_buffer, stages, bind_point); if (pc_stages) radv_flush_constants(cmd_buffer, pc_stages, bind_point); @@ -9939,12 +9506,10 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_inf bool pipeline_is_dirty = pipeline != cmd_buffer->state.emitted_compute_pipeline; if (compute_shader->info.cs.regalloc_hang_bug) - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | - RADV_CMD_FLAG_CS_PARTIAL_FLUSH; + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH; - if (cmd_buffer->state.flush_bits & - (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB | - RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { + if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB | + RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { /* If we have to wait for idle, set all states first, so that * all SET packets are processed in parallel with previous draw * calls. Then upload descriptors, set shader pointers, and @@ -10006,14 +9571,13 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_inf void radv_compute_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info) { - radv_dispatch(cmd_buffer, info, cmd_buffer->state.compute_pipeline, - cmd_buffer->state.shaders[MESA_SHADER_COMPUTE], + radv_dispatch(cmd_buffer, info, cmd_buffer->state.compute_pipeline, cmd_buffer->state.shaders[MESA_SHADER_COMPUTE], VK_PIPELINE_BIND_POINT_COMPUTE); } VKAPI_ATTR void VKAPI_CALL -radv_CmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t base_x, uint32_t base_y, - uint32_t base_z, uint32_t x, uint32_t y, uint32_t z) +radv_CmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t base_x, uint32_t base_y, uint32_t base_z, uint32_t x, + uint32_t y, uint32_t z) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_dispatch_info info = {0}; @@ -10072,8 +9636,8 @@ enum radv_rt_mode { }; static void -radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCommand2KHR *tables, - uint64_t indirect_va, enum radv_rt_mode mode) +radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCommand2KHR *tables, uint64_t indirect_va, + enum radv_rt_mode mode) { if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_RT) return; @@ -10087,10 +9651,8 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCom uint32_t wave_size = rt_prolog->info.wave_size; /* The hardware register is specified as a multiple of 64 or 256 DWORDS. */ - unsigned scratch_alloc_granule = - cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 ? 256 : 1024; - scratch_bytes_per_wave += - align(cmd_buffer->state.rt_stack_size * wave_size, scratch_alloc_granule); + unsigned scratch_alloc_granule = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 ? 256 : 1024; + scratch_bytes_per_wave += align(cmd_buffer->state.rt_stack_size * wave_size, scratch_alloc_granule); cmd_buffer->compute_scratch_size_per_wave_needed = MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, scratch_bytes_per_wave); @@ -10102,9 +9664,8 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCom uint64_t sbt_va; if (mode != radv_rt_mode_indirect2) { - uint32_t upload_size = mode == radv_rt_mode_direct - ? sizeof(VkTraceRaysIndirectCommand2KHR) - : offsetof(VkTraceRaysIndirectCommand2KHR, width); + uint32_t upload_size = mode == radv_rt_mode_direct ? sizeof(VkTraceRaysIndirectCommand2KHR) + : offsetof(VkTraceRaysIndirectCommand2KHR, width); uint32_t offset; if (!radv_cmd_buffer_upload_data(cmd_buffer, upload_size, tables, &offset)) @@ -10112,9 +9673,8 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCom uint64_t upload_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset; - launch_size_va = (mode == radv_rt_mode_direct) - ? upload_va + offsetof(VkTraceRaysIndirectCommand2KHR, width) - : indirect_va; + launch_size_va = + (mode == radv_rt_mode_direct) ? upload_va + offsetof(VkTraceRaysIndirectCommand2KHR, width) : indirect_va; sbt_va = upload_va; } else { launch_size_va = indirect_va + offsetof(VkTraceRaysIndirectCommand2KHR, width); @@ -10130,35 +9690,29 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCom ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15); - const struct radv_userdata_info *desc_loc = - radv_get_user_sgpr(rt_prolog, AC_UD_CS_SBT_DESCRIPTORS); + const struct radv_userdata_info *desc_loc = radv_get_user_sgpr(rt_prolog, AC_UD_CS_SBT_DESCRIPTORS); if (desc_loc->sgpr_idx != -1) { - radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, - base_reg + desc_loc->sgpr_idx * 4, sbt_va, true); + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + desc_loc->sgpr_idx * 4, sbt_va, true); } - const struct radv_userdata_info *size_loc = - radv_get_user_sgpr(rt_prolog, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR); + const struct radv_userdata_info *size_loc = radv_get_user_sgpr(rt_prolog, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR); if (size_loc->sgpr_idx != -1) { - radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, - base_reg + size_loc->sgpr_idx * 4, launch_size_va, true); + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + size_loc->sgpr_idx * 4, launch_size_va, + true); } - const struct radv_userdata_info *base_loc = - radv_get_user_sgpr(rt_prolog, AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE); + const struct radv_userdata_info *base_loc = radv_get_user_sgpr(rt_prolog, AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE); if (base_loc->sgpr_idx != -1) { const struct radv_shader_info *cs_info = &rt_prolog->info; radeon_set_sh_reg(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + base_loc->sgpr_idx * 4, pipeline->base.scratch_bytes_per_wave / cs_info->wave_size); } - const struct radv_userdata_info *shader_loc = - radv_get_user_sgpr(rt_prolog, AC_UD_CS_TRAVERSAL_SHADER_ADDR); + const struct radv_userdata_info *shader_loc = radv_get_user_sgpr(rt_prolog, AC_UD_CS_TRAVERSAL_SHADER_ADDR); if (shader_loc->sgpr_idx != -1) { - uint64_t traversal_va = - cmd_buffer->state.shaders[MESA_SHADER_INTERSECTION]->va | radv_rt_priority_traversal; - radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, - base_reg + shader_loc->sgpr_idx * 4, traversal_va, true); + uint64_t traversal_va = cmd_buffer->state.shaders[MESA_SHADER_INTERSECTION]->va | radv_rt_priority_traversal; + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + shader_loc->sgpr_idx * 4, traversal_va, + true); } assert(cmd_buffer->cs->cdw <= cdw_max); @@ -10167,12 +9721,11 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCom } VKAPI_ATTR void VKAPI_CALL -radv_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, - const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable, +radv_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable, const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable, const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable, - const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, - uint32_t width, uint32_t height, uint32_t depth) + const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, uint32_t width, + uint32_t height, uint32_t depth) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); @@ -10263,8 +9816,7 @@ radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *ima /* Transitioning from LAYOUT_UNDEFINED layout not everyone is consistent * in considering previous rendering work for WAW hazards. */ - state->flush_bits |= - radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, image); + state->flush_bits |= radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, image); if (image->planes[0].surface.has_stencil && !(range->aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) { @@ -10290,9 +9842,8 @@ radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *ima static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - VkImageLayout src_layout, VkImageLayout dst_layout, - unsigned src_queue_mask, unsigned dst_queue_mask, - const VkImageSubresourceRange *range, + VkImageLayout src_layout, VkImageLayout dst_layout, unsigned src_queue_mask, + unsigned dst_queue_mask, const VkImageSubresourceRange *range, struct radv_sample_locations_state *sample_locs) { struct radv_device *device = cmd_buffer->device; @@ -10307,19 +9858,17 @@ radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer, struct ra radv_initialize_htile(cmd_buffer, image, range); } else if (radv_layout_is_htile_compressed(device, image, src_layout, src_queue_mask) && !radv_layout_is_htile_compressed(device, image, dst_layout, dst_queue_mask)) { - cmd_buffer->state.flush_bits |= - RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; radv_expand_depth_stencil(cmd_buffer, image, range, sample_locs); - cmd_buffer->state.flush_bits |= - RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; } } static uint32_t -radv_init_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - const VkImageSubresourceRange *range, uint32_t value) +radv_init_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range, + uint32_t value) { struct radv_barrier_data barrier = {0}; @@ -10330,8 +9879,7 @@ radv_init_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, } uint32_t -radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - const VkImageSubresourceRange *range) +radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range) { static const uint32_t fmask_clear_values[4] = {0x00000000, 0x02020202, 0xE4E4E4E4, 0x76543210}; uint32_t log2_samples = util_logbase2(image->vk.samples); @@ -10345,8 +9893,8 @@ radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, } uint32_t -radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - const VkImageSubresourceRange *range, uint32_t value) +radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range, + uint32_t value) { struct radv_barrier_data barrier = {0}; uint32_t flush_bits = 0; @@ -10365,8 +9913,7 @@ radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, /* Compute the size of all fast clearable DCC levels. */ for (unsigned i = 0; i < image->planes[0].surface.num_meta_levels; i++) { struct legacy_surf_dcc_level *dcc_level = &image->planes[0].surface.u.legacy.color.dcc_level[i]; - unsigned dcc_fast_clear_size = - dcc_level->dcc_slice_fast_clear_size * image->vk.array_layers; + unsigned dcc_fast_clear_size = dcc_level->dcc_slice_fast_clear_size * image->vk.array_layers; if (!dcc_fast_clear_size) break; @@ -10377,8 +9924,7 @@ radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, /* Initialize the mipmap levels without DCC. */ if (size != image->planes[0].surface.meta_size) { flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, - radv_buffer_get_va(image->bindings[0].bo) + - image->bindings[0].offset + + radv_buffer_get_va(image->bindings[0].bo) + image->bindings[0].offset + image->planes[0].surface.meta_offset + size, image->planes[0].surface.meta_size - size, 0xffffffff); } @@ -10391,9 +9937,8 @@ radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, * Initialize DCC/FMASK/CMASK metadata for a color image. */ static void -radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - VkImageLayout src_layout, VkImageLayout dst_layout, - unsigned src_queue_mask, unsigned dst_queue_mask, +radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout, + VkImageLayout dst_layout, unsigned src_queue_mask, unsigned dst_queue_mask, const VkImageSubresourceRange *range) { uint32_t flush_bits = 0; @@ -10401,8 +9946,7 @@ radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_i /* Transitioning from LAYOUT_UNDEFINED layout not everyone is * consistent in considering previous rendering work for WAW hazards. */ - cmd_buffer->state.flush_bits |= - radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image); + cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image); if (radv_image_has_cmask(image)) { uint32_t value; @@ -10411,8 +9955,7 @@ radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_i /* TODO: Fix clearing CMASK layers on GFX9. */ if (radv_image_is_tc_compat_cmask(image) || (radv_image_has_fmask(image) && - radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, dst_layout, - dst_queue_mask))) { + radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, dst_layout, dst_queue_mask))) { value = 0xccccccccu; } else { value = 0xffffffffu; @@ -10434,8 +9977,7 @@ radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_i if (radv_dcc_enabled(image, range->baseMipLevel)) { uint32_t value = 0xffffffffu; /* Fully expanded mode. */ - if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, - dst_layout, dst_queue_mask)) { + if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, dst_layout, dst_queue_mask)) { value = 0u; } @@ -10453,16 +9995,15 @@ radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_i } static void -radv_retile_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - VkImageLayout src_layout, VkImageLayout dst_layout, unsigned dst_queue_mask) +radv_retile_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout, + VkImageLayout dst_layout, unsigned dst_queue_mask) { /* If the image is read-only, we don't have to retile DCC because it can't change. */ if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS)) return; if (src_layout != VK_IMAGE_LAYOUT_PRESENT_SRC_KHR && - (dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR || - (dst_queue_mask & (1u << RADV_QUEUE_FOREIGN)))) + (dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR || (dst_queue_mask & (1u << RADV_QUEUE_FOREIGN)))) radv_retile_dcc(cmd_buffer, image); } @@ -10478,19 +10019,16 @@ radv_image_need_retile(const struct radv_image *image) */ static void radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - VkImageLayout src_layout, VkImageLayout dst_layout, - unsigned src_queue_mask, unsigned dst_queue_mask, - const VkImageSubresourceRange *range) + VkImageLayout src_layout, VkImageLayout dst_layout, unsigned src_queue_mask, + unsigned dst_queue_mask, const VkImageSubresourceRange *range) { bool dcc_decompressed = false, fast_clear_flushed = false; - if (!radv_image_has_cmask(image) && !radv_image_has_fmask(image) && - !radv_dcc_enabled(image, range->baseMipLevel)) + if (!radv_image_has_cmask(image) && !radv_image_has_fmask(image) && !radv_dcc_enabled(image, range->baseMipLevel)) return; if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { - radv_init_color_image_metadata(cmd_buffer, image, src_layout, dst_layout, - src_queue_mask, dst_queue_mask, range); + radv_init_color_image_metadata(cmd_buffer, image, src_layout, dst_layout, src_queue_mask, dst_queue_mask, range); if (radv_image_need_retile(image)) radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask); @@ -10500,16 +10038,16 @@ radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer, struct ra if (radv_dcc_enabled(image, range->baseMipLevel)) { if (src_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) { cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, range, 0xffffffffu); - } else if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, - src_layout, src_queue_mask) && - !radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, - dst_layout, dst_queue_mask)) { + } else if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, src_layout, + src_queue_mask) && + !radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, dst_layout, + dst_queue_mask)) { radv_decompress_dcc(cmd_buffer, image, range); dcc_decompressed = true; - } else if (radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, - src_layout, src_queue_mask) && - !radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, - dst_layout, dst_queue_mask)) { + } else if (radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, src_layout, + src_queue_mask) && + !radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, dst_layout, + dst_queue_mask)) { radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); fast_clear_flushed = true; } @@ -10517,26 +10055,24 @@ radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer, struct ra if (radv_image_need_retile(image)) radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask); } else if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) { - if (radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, - src_layout, src_queue_mask) && - !radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, - dst_layout, dst_queue_mask)) { + if (radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, src_layout, src_queue_mask) && + !radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, dst_layout, dst_queue_mask)) { radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); fast_clear_flushed = true; } } /* MSAA color decompress. */ - const enum radv_fmask_compression src_fmask_comp = radv_layout_fmask_compression(cmd_buffer->device, - image, src_layout, src_queue_mask); - const enum radv_fmask_compression dst_fmask_comp = radv_layout_fmask_compression(cmd_buffer->device, - image, dst_layout, dst_queue_mask); + const enum radv_fmask_compression src_fmask_comp = + radv_layout_fmask_compression(cmd_buffer->device, image, src_layout, src_queue_mask); + const enum radv_fmask_compression dst_fmask_comp = + radv_layout_fmask_compression(cmd_buffer->device, image, dst_layout, dst_queue_mask); if (src_fmask_comp <= dst_fmask_comp) return; if (src_fmask_comp == RADV_FMASK_COMPRESSION_FULL) { - if (radv_dcc_enabled(image, range->baseMipLevel) && - !radv_image_use_dcc_image_stores(cmd_buffer->device, image) && !dcc_decompressed) { + if (radv_dcc_enabled(image, range->baseMipLevel) && !radv_image_use_dcc_image_stores(cmd_buffer->device, image) && + !dcc_decompressed) { /* A DCC decompress is required before expanding FMASK * when DCC stores aren't supported to avoid being in * a state where DCC is compressed and the main @@ -10561,11 +10097,9 @@ radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer, struct ra } static void -radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - VkImageLayout src_layout, VkImageLayout dst_layout, - uint32_t src_family_index, uint32_t dst_family_index, - const VkImageSubresourceRange *range, - struct radv_sample_locations_state *sample_locs) +radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout, + VkImageLayout dst_layout, uint32_t src_family_index, uint32_t dst_family_index, + const VkImageSubresourceRange *range, struct radv_sample_locations_state *sample_locs) { enum radv_queue_family src_qf = vk_queue_to_radv(cmd_buffer->device->physical_device, src_family_index); enum radv_queue_family dst_qf = vk_queue_to_radv(cmd_buffer->device->physical_device, dst_family_index); @@ -10574,8 +10108,7 @@ radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_ima * a corresponding release/acquire. Do the transition in the * most flexible queue. */ - assert(src_qf == cmd_buffer->qf || - dst_qf == cmd_buffer->qf); + assert(src_qf == cmd_buffer->qf || dst_qf == cmd_buffer->qf); if (src_family_index == VK_QUEUE_FAMILY_EXTERNAL || src_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT) return; @@ -10583,25 +10116,22 @@ radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_ima if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) return; - if (cmd_buffer->qf == RADV_QUEUE_COMPUTE && - (src_qf == RADV_QUEUE_GENERAL || dst_qf == RADV_QUEUE_GENERAL)) + if (cmd_buffer->qf == RADV_QUEUE_COMPUTE && (src_qf == RADV_QUEUE_GENERAL || dst_qf == RADV_QUEUE_GENERAL)) return; } - unsigned src_queue_mask = - radv_image_queue_family_mask(image, src_qf, cmd_buffer->qf); - unsigned dst_queue_mask = - radv_image_queue_family_mask(image, dst_qf, cmd_buffer->qf); + unsigned src_queue_mask = radv_image_queue_family_mask(image, src_qf, cmd_buffer->qf); + unsigned dst_queue_mask = radv_image_queue_family_mask(image, dst_qf, cmd_buffer->qf); if (src_layout == dst_layout && src_queue_mask == dst_queue_mask) return; if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { - radv_handle_depth_image_transition(cmd_buffer, image, src_layout, dst_layout, - src_queue_mask, dst_queue_mask, range, sample_locs); + radv_handle_depth_image_transition(cmd_buffer, image, src_layout, dst_layout, src_queue_mask, dst_queue_mask, + range, sample_locs); } else { - radv_handle_color_image_transition(cmd_buffer, image, src_layout, dst_layout, - src_queue_mask, dst_queue_mask, range); + radv_handle_color_image_transition(cmd_buffer, image, src_layout, dst_layout, src_queue_mask, dst_queue_mask, + range); } } @@ -10613,15 +10143,14 @@ radv_cp_dma_wait_for_stages(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageF * operation but it might also use a CP DMA copy in some rare situations. Other operations using * a CP DMA clear are implicitly synchronized (see CP_DMA_SYNC). */ - if (stage_mask & (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT | - VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) + if (stage_mask & + (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT | VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | + VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) si_cp_dma_wait_for_idle(cmd_buffer); } static void -radv_barrier(struct radv_cmd_buffer *cmd_buffer, const VkDependencyInfo *dep_info, - enum rgp_barrier_reason reason) +radv_barrier(struct radv_cmd_buffer *cmd_buffer, const VkDependencyInfo *dep_info, enum rgp_barrier_reason reason) { enum radv_cmd_flush_bits src_flush_bits = 0; enum radv_cmd_flush_bits dst_flush_bits = 0; @@ -10635,31 +10164,25 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer, const VkDependencyInfo *dep_inf for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) { src_stage_mask |= dep_info->pMemoryBarriers[i].srcStageMask; - src_flush_bits |= - radv_src_access_flush(cmd_buffer, dep_info->pMemoryBarriers[i].srcAccessMask, NULL); + src_flush_bits |= radv_src_access_flush(cmd_buffer, dep_info->pMemoryBarriers[i].srcAccessMask, NULL); dst_stage_mask |= dep_info->pMemoryBarriers[i].dstStageMask; - dst_flush_bits |= - radv_dst_access_flush(cmd_buffer, dep_info->pMemoryBarriers[i].dstAccessMask, NULL); + dst_flush_bits |= radv_dst_access_flush(cmd_buffer, dep_info->pMemoryBarriers[i].dstAccessMask, NULL); } for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) { src_stage_mask |= dep_info->pBufferMemoryBarriers[i].srcStageMask; - src_flush_bits |= - radv_src_access_flush(cmd_buffer, dep_info->pBufferMemoryBarriers[i].srcAccessMask, NULL); + src_flush_bits |= radv_src_access_flush(cmd_buffer, dep_info->pBufferMemoryBarriers[i].srcAccessMask, NULL); dst_stage_mask |= dep_info->pBufferMemoryBarriers[i].dstStageMask; - dst_flush_bits |= - radv_dst_access_flush(cmd_buffer, dep_info->pBufferMemoryBarriers[i].dstAccessMask, NULL); + dst_flush_bits |= radv_dst_access_flush(cmd_buffer, dep_info->pBufferMemoryBarriers[i].dstAccessMask, NULL); } for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) { RADV_FROM_HANDLE(radv_image, image, dep_info->pImageMemoryBarriers[i].image); src_stage_mask |= dep_info->pImageMemoryBarriers[i].srcStageMask; - src_flush_bits |= - radv_src_access_flush(cmd_buffer, dep_info->pImageMemoryBarriers[i].srcAccessMask, image); + src_flush_bits |= radv_src_access_flush(cmd_buffer, dep_info->pImageMemoryBarriers[i].srcAccessMask, image); dst_stage_mask |= dep_info->pImageMemoryBarriers[i].dstStageMask; - dst_flush_bits |= - radv_dst_access_flush(cmd_buffer, dep_info->pImageMemoryBarriers[i].dstAccessMask, image); + dst_flush_bits |= radv_dst_access_flush(cmd_buffer, dep_info->pImageMemoryBarriers[i].dstAccessMask, image); } /* The Vulkan spec 1.1.98 says: @@ -10696,10 +10219,8 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer, const VkDependencyInfo *dep_inf } radv_handle_image_transition( - cmd_buffer, image, dep_info->pImageMemoryBarriers[i].oldLayout, - dep_info->pImageMemoryBarriers[i].newLayout, - dep_info->pImageMemoryBarriers[i].srcQueueFamilyIndex, - dep_info->pImageMemoryBarriers[i].dstQueueFamilyIndex, + cmd_buffer, image, dep_info->pImageMemoryBarriers[i].oldLayout, dep_info->pImageMemoryBarriers[i].newLayout, + dep_info->pImageMemoryBarriers[i].srcQueueFamilyIndex, dep_info->pImageMemoryBarriers[i].dstQueueFamilyIndex, &dep_info->pImageMemoryBarriers[i].subresourceRange, sample_locs_info ? &sample_locations : NULL); } @@ -10712,8 +10233,7 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer, const VkDependencyInfo *dep_inf } VKAPI_ATTR void VKAPI_CALL -radv_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, - const VkDependencyInfo *pDependencyInfo) +radv_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, const VkDependencyInfo *pDependencyInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); @@ -10721,8 +10241,8 @@ radv_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, } static void -write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, - VkPipelineStageFlags2 stageMask, unsigned value) +write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, VkPipelineStageFlags2 stageMask, + unsigned value) { struct radeon_cmdbuf *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(event->bo); @@ -10733,9 +10253,7 @@ write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28); - if (stageMask & (VK_PIPELINE_STAGE_2_COPY_BIT | - VK_PIPELINE_STAGE_2_RESOLVE_BIT | - VK_PIPELINE_STAGE_2_BLIT_BIT | + if (stageMask & (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_RESOLVE_BIT | VK_PIPELINE_STAGE_2_BLIT_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT)) { /* Be conservative for now. */ stageMask |= VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT; @@ -10751,13 +10269,11 @@ write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, /* Flags that only require signaling post PS. */ VkPipelineStageFlags2 post_ps_flags = post_index_fetch_flags | VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | - VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT | - VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT | - VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT | - VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR | - VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT; + VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT | + VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT | VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT | + VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR | VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT; /* Flags that only require signaling post CS. */ VkPipelineStageFlags2 post_cs_flags = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; @@ -10793,17 +10309,15 @@ write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, } si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, - radv_cmd_buffer_uses_mec(cmd_buffer), event_type, 0, - EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, value, - cmd_buffer->gfx9_eop_bug_va); + radv_cmd_buffer_uses_mec(cmd_buffer), event_type, 0, EOP_DST_SEL_MEM, + EOP_DATA_SEL_VALUE_32BIT, va, value, cmd_buffer->gfx9_eop_bug_va); } assert(cmd_buffer->cs->cdw <= cdw_max); } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetEvent2(VkCommandBuffer commandBuffer, VkEvent _event, - const VkDependencyInfo* pDependencyInfo) +radv_CmdSetEvent2(VkCommandBuffer commandBuffer, VkEvent _event, const VkDependencyInfo *pDependencyInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_event, event, _event); @@ -10820,8 +10334,7 @@ radv_CmdSetEvent2(VkCommandBuffer commandBuffer, VkEvent _event, } VKAPI_ATTR void VKAPI_CALL -radv_CmdResetEvent2(VkCommandBuffer commandBuffer, VkEvent _event, - VkPipelineStageFlags2 stageMask) +radv_CmdResetEvent2(VkCommandBuffer commandBuffer, VkEvent _event, VkPipelineStageFlags2 stageMask) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_event, event, _event); @@ -10831,7 +10344,7 @@ radv_CmdResetEvent2(VkCommandBuffer commandBuffer, VkEvent _event, VKAPI_ATTR void VKAPI_CALL radv_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents, - const VkDependencyInfo* pDependencyInfos) + const VkDependencyInfo *pDependencyInfos) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radeon_cmdbuf *cs = cmd_buffer->cs; @@ -10853,9 +10366,8 @@ radv_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const Vk /* VK_EXT_conditional_rendering */ VKAPI_ATTR void VKAPI_CALL -radv_CmdBeginConditionalRenderingEXT( - VkCommandBuffer commandBuffer, - const VkConditionalRenderingBeginInfoEXT *pConditionalRenderingBegin) +radv_CmdBeginConditionalRenderingEXT(VkCommandBuffer commandBuffer, + const VkConditionalRenderingBeginInfoEXT *pConditionalRenderingBegin) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer); @@ -10877,8 +10389,7 @@ radv_CmdBeginConditionalRenderingEXT( si_emit_cache_flush(cmd_buffer); - if (cmd_buffer->qf == RADV_QUEUE_GENERAL && - !cmd_buffer->device->physical_device->rad_info.has_32bit_predication) { + if (cmd_buffer->qf == RADV_QUEUE_GENERAL && !cmd_buffer->device->physical_device->rad_info.has_32bit_predication) { uint64_t pred_value = 0, pred_va; unsigned pred_offset; @@ -10915,8 +10426,8 @@ radv_CmdBeginConditionalRenderingEXT( radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 8); radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); + radeon_emit(cs, + COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM); radeon_emit(cs, va); radeon_emit(cs, va >> 32); radeon_emit(cs, pred_va); @@ -10962,9 +10473,9 @@ radv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer) /* VK_EXT_transform_feedback */ VKAPI_ATTR void VKAPI_CALL -radv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, uint32_t firstBinding, - uint32_t bindingCount, const VkBuffer *pBuffers, - const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes) +radv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount, + const VkBuffer *pBuffers, const VkDeviceSize *pOffsets, + const VkDeviceSize *pSizes) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; @@ -11009,8 +10520,7 @@ radv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer) radeon_set_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2); radeon_emit(cs, S_028B94_STREAMOUT_0_EN(streamout_enabled) | S_028B94_RAST_STREAM(0) | - S_028B94_STREAMOUT_1_EN(streamout_enabled) | - S_028B94_STREAMOUT_2_EN(streamout_enabled) | + S_028B94_STREAMOUT_1_EN(streamout_enabled) | S_028B94_STREAMOUT_2_EN(streamout_enabled) | S_028B94_STREAMOUT_3_EN(streamout_enabled)); radeon_emit(cs, so->hw_enabled_mask & enabled_stream_buffers_mask); @@ -11028,8 +10538,8 @@ radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable) so->streamout_enabled = enable; - so->hw_enabled_mask = so->enabled_mask | (so->enabled_mask << 4) | (so->enabled_mask << 8) | - (so->enabled_mask << 12); + so->hw_enabled_mask = + so->enabled_mask | (so->enabled_mask << 4) | (so->enabled_mask << 8) | (so->enabled_mask << 12); if (!cmd_buffer->device->physical_device->use_ngg_streamout && ((old_streamout_enabled != radv_is_streamout_enabled(cmd_buffer)) || @@ -11073,8 +10583,7 @@ radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer) radeon_emit(cs, EVENT_TYPE(V_028A90_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0)); radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(cs, - WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ + radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ radeon_emit(cs, reg_strmout_cntl >> 2); /* register */ radeon_emit(cs, 0); radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */ @@ -11109,17 +10618,14 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC radv_flush_vgt_streamout(cmd_buffer); } - ASSERTED unsigned cdw_max = - radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MAX_SO_BUFFERS * 10); + ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MAX_SO_BUFFERS * 10); - u_foreach_bit(i, so->enabled_mask) - { + u_foreach_bit (i, so->enabled_mask) { int32_t counter_buffer_idx = i - firstCounterBuffer; if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) counter_buffer_idx = -1; - bool append = - counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]; + bool append = counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]; uint64_t va = 0; if (append) { @@ -11137,8 +10643,8 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC if (cmd_buffer->device->physical_device->use_ngg_streamout) { radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); - radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) | - S_411_DST_SEL(V_411_GDS) | S_411_CP_SYNC(i == last_target)); + radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) | S_411_DST_SEL(V_411_GDS) | + S_411_CP_SYNC(i == last_target)); radeon_emit(cs, va); radeon_emit(cs, va >> 32); radeon_emit(cs, 4 * i); /* destination in GDS */ @@ -11150,28 +10656,28 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC * SGPRs what to do. */ radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 2); - radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */ - radeon_emit(cs, info->so.strides[i]); /* VTX_STRIDE (in DW) */ + radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */ + radeon_emit(cs, info->so.strides[i]); /* VTX_STRIDE (in DW) */ cmd_buffer->state.context_roll_without_scissor_emitted = true; if (append) { radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); - radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */ - STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */ - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, va); /* src address lo */ - radeon_emit(cs, va >> 32); /* src address hi */ + radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */ + STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, va); /* src address lo */ + radeon_emit(cs, va >> 32); /* src address hi */ } else { /* Start from the beginning. */ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); - radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */ - STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */ - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */ + STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ } } } @@ -11182,9 +10688,8 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC } VKAPI_ATTR void VKAPI_CALL -radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, - uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, - const VkDeviceSize *pCounterBufferOffsets) +radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, uint32_t counterBufferCount, + const VkBuffer *pCounterBuffers, const VkDeviceSize *pCounterBufferOffsets) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_streamout_state *so = &cmd_buffer->state.streamout; @@ -11195,17 +10700,14 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou if (!cmd_buffer->device->physical_device->use_ngg_streamout) radv_flush_vgt_streamout(cmd_buffer); - ASSERTED unsigned cdw_max = - radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MAX_SO_BUFFERS * 12); + ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MAX_SO_BUFFERS * 12); - u_foreach_bit(i, so->enabled_mask) - { + u_foreach_bit (i, so->enabled_mask) { int32_t counter_buffer_idx = i - firstCounterBuffer; if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) counter_buffer_idx = -1; - bool append = - counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]; + bool append = counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]; uint64_t va = 0; if (append) { @@ -11224,19 +10726,19 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou if (cmd_buffer->device->physical_device->use_ngg_streamout) { if (append) { si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, - radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_PS_DONE, 0, - EOP_DST_SEL_TC_L2, EOP_DATA_SEL_GDS, va, EOP_DATA_GDS(i, 1), 0); + radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_PS_DONE, 0, EOP_DST_SEL_TC_L2, + EOP_DATA_SEL_GDS, va, EOP_DATA_GDS(i, 1), 0); } } else { if (append) { radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */ - STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | - STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */ - radeon_emit(cs, va); /* dst address lo */ - radeon_emit(cs, va >> 32); /* dst address hi */ - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, 0); /* unused */ + STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | + STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */ + radeon_emit(cs, va); /* dst address lo */ + radeon_emit(cs, va >> 32); /* dst address hi */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ } /* Deactivate transform feedback by zeroing the buffer size. @@ -11280,8 +10782,7 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d radeon_emit(cs, 1); /* 1 DWORD */ } else { radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | - COPY_DATA_WR_CONFIRM); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM); radeon_emit(cs, va); radeon_emit(cs, va >> 32); radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); @@ -11292,9 +10793,8 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d } VKAPI_ATTR void VKAPI_CALL -radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanceCount, - uint32_t firstInstance, VkBuffer _counterBuffer, - VkDeviceSize counterBufferOffset, uint32_t counterOffset, +radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanceCount, uint32_t firstInstance, + VkBuffer _counterBuffer, VkDeviceSize counterBufferOffset, uint32_t counterOffset, uint32_t vertexStride) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); @@ -11312,7 +10812,7 @@ radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanc if (!radv_before_draw(cmd_buffer, &info, 1)) return; - struct VkMultiDrawInfoEXT minfo = { 0, 0 }; + struct VkMultiDrawInfoEXT minfo = {0, 0}; radv_emit_strmout_buffer(cmd_buffer, &info); radv_emit_direct_draw_packets(cmd_buffer, &info, 1, &minfo, S_0287F0_USE_OPAQUE(1), 0); radv_after_draw(cmd_buffer); @@ -11320,8 +10820,8 @@ radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanc /* VK_AMD_buffer_marker */ VKAPI_ATTR void VKAPI_CALL -radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, - VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker) +radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkBuffer dstBuffer, + VkDeviceSize dstOffset, uint32_t marker) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_buffer, buffer, dstBuffer); @@ -11334,26 +10834,23 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag if (!(stage & ~VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) { radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM); radeon_emit(cs, marker); radeon_emit(cs, 0); radeon_emit(cs, va); radeon_emit(cs, va >> 32); } else { si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, - radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, - 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, marker, - cmd_buffer->gfx9_eop_bug_va); + radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, + EOP_DATA_SEL_VALUE_32BIT, va, marker, cmd_buffer->gfx9_eop_bug_va); } assert(cmd_buffer->cs->cdw <= cdw_max); } VKAPI_ATTR void VKAPI_CALL -radv_CmdBindPipelineShaderGroupNV(VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline, - uint32_t groupIndex) +radv_CmdBindPipelineShaderGroupNV(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, + VkPipeline pipeline, uint32_t groupIndex) { fprintf(stderr, "radv: unimplemented vkCmdBindPipelineShaderGroupNV\n"); abort(); @@ -11372,28 +10869,24 @@ radv_CmdBindDescriptorBuffersEXT(VkCommandBuffer commandBuffer, uint32_t bufferC } VKAPI_ATTR void VKAPI_CALL -radv_CmdSetDescriptorBufferOffsetsEXT(VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, +radv_CmdSetDescriptorBufferOffsetsEXT(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout, uint32_t firstSet, uint32_t setCount, const uint32_t *pBufferIndices, const VkDeviceSize *pOffsets) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); + struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); for (unsigned i = 0; i < setCount; i++) { unsigned idx = i + firstSet; - descriptors_state->descriptor_buffers[idx] = - cmd_buffer->descriptor_buffers[pBufferIndices[i]] + pOffsets[i]; + descriptors_state->descriptor_buffers[idx] = cmd_buffer->descriptor_buffers[pBufferIndices[i]] + pOffsets[i]; radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, NULL, idx); } } VKAPI_ATTR void VKAPI_CALL -radv_CmdBindDescriptorBufferEmbeddedSamplersEXT(VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, +radv_CmdBindDescriptorBufferEmbeddedSamplersEXT(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout, uint32_t set) { /* This is a no-op because embedded samplers are inlined at compile time. */ diff --git a/src/amd/vulkan/radv_constants.h b/src/amd/vulkan/radv_constants.h index 6d8d12b..868e6c3 100644 --- a/src/amd/vulkan/radv_constants.h +++ b/src/amd/vulkan/radv_constants.h @@ -55,8 +55,8 @@ #define NUM_DEPTH_CLEAR_PIPELINES 2 #define NUM_DEPTH_DECOMPRESS_PIPELINES 3 -#define MAX_FRAMEBUFFER_WIDTH (1u << 14) -#define MAX_FRAMEBUFFER_HEIGHT (1u << 14) +#define MAX_FRAMEBUFFER_WIDTH (1u << 14) +#define MAX_FRAMEBUFFER_HEIGHT (1u << 14) /* * This is the point we switch from using CP to compute shader @@ -127,10 +127,9 @@ #define RADV_SHADER_ALLOC_MIN_ARENA_SIZE (256 * 1024) /* 256 KiB << 5 = 8 MiB */ #define RADV_SHADER_ALLOC_MAX_ARENA_SIZE_SHIFT 5u -#define RADV_SHADER_ALLOC_MIN_SIZE_CLASS 8 -#define RADV_SHADER_ALLOC_MAX_SIZE_CLASS 15 -#define RADV_SHADER_ALLOC_NUM_FREE_LISTS \ - (RADV_SHADER_ALLOC_MAX_SIZE_CLASS - RADV_SHADER_ALLOC_MIN_SIZE_CLASS + 1) +#define RADV_SHADER_ALLOC_MIN_SIZE_CLASS 8 +#define RADV_SHADER_ALLOC_MAX_SIZE_CLASS 15 +#define RADV_SHADER_ALLOC_NUM_FREE_LISTS (RADV_SHADER_ALLOC_MAX_SIZE_CLASS - RADV_SHADER_ALLOC_MIN_SIZE_CLASS + 1) #define PERF_CTR_MAX_PASSES 512 #define PERF_CTR_BO_PASS_OFFSET 16 @@ -148,7 +147,7 @@ * offset 20|24|28|32 - generated primitive counter for stream 0|1|2|3 * offset 36|40|44|48 - written primitive counter for stream 0|1|2|3 */ -#define RADV_NGG_QUERY_PIPELINE_STAT_OFFSET 16 +#define RADV_NGG_QUERY_PIPELINE_STAT_OFFSET 16 #define RADV_NGG_QUERY_PRIM_GEN_OFFSET(stream) (20 + stream * 4) #define RADV_NGG_QUERY_PRIM_XFB_OFFSET(stream) (36 + stream * 4) diff --git a/src/amd/vulkan/radv_cp_reg_shadowing.c b/src/amd/vulkan/radv_cp_reg_shadowing.c index 18d1bc7..9f0aae8 100644 --- a/src/amd/vulkan/radv_cp_reg_shadowing.c +++ b/src/amd/vulkan/radv_cp_reg_shadowing.c @@ -29,16 +29,14 @@ #include "sid.h" static void -radv_set_context_reg_array(struct radeon_cmdbuf *cs, unsigned reg, unsigned num, - const uint32_t *values) +radv_set_context_reg_array(struct radeon_cmdbuf *cs, unsigned reg, unsigned num, const uint32_t *values) { radeon_set_context_reg_seq(cs, reg, num); radeon_emit_array(cs, values, num); } VkResult -radv_create_shadow_regs_preamble(const struct radv_device *device, - struct radv_queue_state *queue_state) +radv_create_shadow_regs_preamble(const struct radv_device *device, struct radv_queue_state *queue_state) { struct radeon_winsys *ws = device->ws; const struct radeon_info *info = &device->physical_device->rad_info; @@ -50,14 +48,14 @@ radv_create_shadow_regs_preamble(const struct radv_device *device, /* allocate memory for queue_state->shadowed_regs where register states are saved */ result = ws->buffer_create(ws, SI_SHADOWED_REG_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM, - RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_NO_INTERPROCESS_SHARING, - RADV_BO_PRIORITY_SCRATCH, 0, &queue_state->shadowed_regs); + RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_NO_INTERPROCESS_SHARING, RADV_BO_PRIORITY_SCRATCH, 0, + &queue_state->shadowed_regs); if (result != VK_SUCCESS) goto fail; /* fill the cs for shadow regs preamble ib that starts the register shadowing */ - ac_create_shadowing_ib_preamble(info, (pm4_cmd_add_fn)&radeon_emit, cs, - queue_state->shadowed_regs->va, device->pbb_allowed); + ac_create_shadowing_ib_preamble(info, (pm4_cmd_add_fn)&radeon_emit, cs, queue_state->shadowed_regs->va, + device->pbb_allowed); while (cs->cdw & 7) { if (info->gfx_ib_pad_with_type2) @@ -66,10 +64,10 @@ radv_create_shadow_regs_preamble(const struct radv_device *device, radeon_emit(cs, PKT3_NOP_PAD); } - result = ws->buffer_create(ws, cs->cdw * 4, 4096, ws->cs_domain(ws), - RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC, - RADV_BO_PRIORITY_CS, 0, &queue_state->shadow_regs_ib); + result = ws->buffer_create( + ws, cs->cdw * 4, 4096, ws->cs_domain(ws), + RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC, + RADV_BO_PRIORITY_CS, 0, &queue_state->shadow_regs_ib); if (result != VK_SUCCESS) goto fail_ib_buffer; diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h index 822e0b0..afa7157 100644 --- a/src/amd/vulkan/radv_cs.h +++ b/src/amd/vulkan/radv_cs.h @@ -103,8 +103,8 @@ radeon_set_sh_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) } static inline void -radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, - unsigned reg, unsigned idx, unsigned value) +radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, unsigned reg, unsigned idx, + unsigned value) { assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); assert(cs->cdw + 3 <= cs->reserved_dw); @@ -147,8 +147,8 @@ radeon_set_uconfig_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) } static inline void -radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, - unsigned reg, unsigned idx, unsigned value) +radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, unsigned reg, + unsigned idx, unsigned value) { assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); assert(cs->cdw + 3 <= cs->reserved_dw); @@ -176,8 +176,8 @@ radeon_set_perfctr_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, unsigne * that means that it can skip register writes due to not taking correctly into account the * fields from the GRBM_GFX_INDEX. With this bit we can force the write. */ - bool filter_cam_workaround = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10 && - cmd_buffer->qf == RADV_QUEUE_GENERAL; + bool filter_cam_workaround = + cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10 && cmd_buffer->qf == RADV_QUEUE_GENERAL; radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0) | PKT3_RESET_FILTER_CAM(filter_cam_workaround)); radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2); diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c index 9872e3f..1e4f92c 100644 --- a/src/amd/vulkan/radv_debug.c +++ b/src/amd/vulkan/radv_debug.c @@ -71,8 +71,8 @@ radv_init_trace(struct radv_device *device) result = ws->buffer_create( ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM, - RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM | - RADEON_FLAG_VA_UNCACHED, RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo); + RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_VA_UNCACHED, + RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo); if (result != VK_SUCCESS) return false; @@ -114,8 +114,8 @@ radv_dump_mmapped_reg(const struct radv_device *device, FILE *f, unsigned offset uint32_t value; if (ws->read_registers(ws, offset, 1, &value)) - ac_dump_reg(f, device->physical_device->rad_info.gfx_level, - device->physical_device->rad_info.family, offset, value, ~0); + ac_dump_reg(f, device->physical_device->rad_info.gfx_level, device->physical_device->rad_info.family, offset, + value, ~0); } static void @@ -152,8 +152,7 @@ radv_dump_debug_registers(const struct radv_device *device, FILE *f) } static void -radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, - const uint32_t *desc, FILE *f) +radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f) { fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); for (unsigned j = 0; j < 4; j++) @@ -161,11 +160,9 @@ radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level, enum radeon_family fam } static void -radv_dump_image_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, - const uint32_t *desc, FILE *f) +radv_dump_image_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f) { - unsigned sq_img_rsrc_word0 = - gfx_level >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0; + unsigned sq_img_rsrc_word0 = gfx_level >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0; fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n"); for (unsigned j = 0; j < 8; j++) @@ -177,8 +174,7 @@ radv_dump_image_descriptor(enum amd_gfx_level gfx_level, enum radeon_family fami } static void -radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, - const uint32_t *desc, FILE *f) +radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f) { fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n"); for (unsigned j = 0; j < 4; j++) { @@ -187,17 +183,15 @@ radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level, enum radeon_family fa } static void -radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level, - enum radeon_family family, const uint32_t *desc, - FILE *f) +radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, + const uint32_t *desc, FILE *f) { radv_dump_image_descriptor(gfx_level, family, desc, f); radv_dump_sampler_descriptor(gfx_level, family, desc + 16, f); } static void -radv_dump_descriptor_set(const struct radv_device *device, const struct radv_descriptor_set *set, unsigned id, - FILE *f) +radv_dump_descriptor_set(const struct radv_device *device, const struct radv_descriptor_set *set, unsigned id, FILE *f) { enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; enum radeon_family family = device->physical_device->rad_info.family; @@ -267,8 +261,7 @@ struct radv_shader_inst { /* Split a disassembly string into lines and add them to the array pointed * to by "instructions". */ static void -si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num, - struct radv_shader_inst *instructions) +si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num, struct radv_shader_inst *instructions) { struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL; char *next; @@ -293,9 +286,8 @@ si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num, /* More than 16 chars after ";" means the instruction is 8 bytes long. */ inst->size = next - semicolon > 16 ? 8 : 4; - snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len, - " [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset, - inst->size); + snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len, " [PC=0x%" PRIx64 ", off=%u, size=%u]", + start_addr + inst->offset, inst->offset, inst->size); last_inst = inst; (*num)++; @@ -304,8 +296,8 @@ si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num, } static void -radv_dump_annotated_shader(const struct radv_shader *shader, gl_shader_stage stage, - struct ac_wave_info *waves, unsigned num_waves, FILE *f) +radv_dump_annotated_shader(const struct radv_shader *shader, gl_shader_stage stage, struct ac_wave_info *waves, + unsigned num_waves, FILE *f) { uint64_t start_addr, end_addr; unsigned i; @@ -333,13 +325,11 @@ radv_dump_annotated_shader(const struct radv_shader *shader, gl_shader_stage sta * Buffer size / 4 is the upper bound of the instruction count. */ unsigned num_inst = 0; - struct radv_shader_inst *instructions = - calloc(shader->code_size / 4, sizeof(struct radv_shader_inst)); + struct radv_shader_inst *instructions = calloc(shader->code_size / 4, sizeof(struct radv_shader_inst)); si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions); - fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", - radv_get_shader_name(&shader->info, stage)); + fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", radv_get_shader_name(&shader->info, stage)); /* Print instructions with annotations. */ for (i = 0; i < num_inst; i++) { @@ -386,8 +376,8 @@ radv_dump_spirv(const struct radv_shader *shader, const char *sha1, const char * } static void -radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, - struct radv_shader *shader, gl_shader_stage stage, const char *dump_dir, FILE *f) +radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader, + gl_shader_stage stage, const char *dump_dir, FILE *f) { if (!shader) return; @@ -409,16 +399,14 @@ radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, fprintf(f, "NIR:\n%s\n", shader->nir_string); } - fprintf(f, "%s IR:\n%s\n", device->physical_device->use_llvm ? "LLVM" : "ACO", - shader->ir_string); + fprintf(f, "%s IR:\n%s\n", device->physical_device->use_llvm ? "LLVM" : "ACO", shader->ir_string); fprintf(f, "DISASM:\n%s\n", shader->disasm_string); radv_dump_shader_stats(device, pipeline, shader, stage, f); } static void -radv_dump_vertex_descriptors(const struct radv_device *device, - const struct radv_graphics_pipeline *pipeline, FILE *f) +radv_dump_vertex_descriptors(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, FILE *f) { struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX); void *ptr = (uint64_t *)device->trace_id_ptr; @@ -428,8 +416,7 @@ radv_dump_vertex_descriptors(const struct radv_device *device, if (!count) return; - fprintf(f, "Num vertex %s: %d\n", - vs->info.vs.use_per_attribute_vb_descs ? "attributes" : "bindings", count); + fprintf(f, "Num vertex %s: %d\n", vs->info.vs.use_per_attribute_vb_descs ? "attributes" : "bindings", count); for (uint32_t i = 0; i < count; i++) { uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4]; uint64_t va = 0; @@ -452,8 +439,7 @@ radv_get_saved_vs_prolog(const struct radv_device *device) } static void -radv_dump_vs_prolog(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, - FILE *f) +radv_dump_vs_prolog(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, FILE *f) { struct radv_shader_part *vs_prolog = radv_get_saved_vs_prolog(device); struct radv_shader *vs_shader = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX); @@ -486,8 +472,7 @@ radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f) pipeline = radv_get_saved_pipeline(queue->device, ring); if (pipeline) { if (pipeline->type == RADV_PIPELINE_GRAPHICS) { - struct radv_graphics_pipeline *graphics_pipeline = - radv_pipeline_to_graphics(pipeline); + struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); radv_dump_vs_prolog(device, graphics_pipeline, f); @@ -496,23 +481,21 @@ radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f) while (stages) { int stage = u_bit_scan(&stages); - radv_dump_shader(device, &graphics_pipeline->base, graphics_pipeline->base.shaders[stage], - stage, dump_dir, f); + radv_dump_shader(device, &graphics_pipeline->base, graphics_pipeline->base.shaders[stage], stage, dump_dir, + f); } } else if (pipeline->type == RADV_PIPELINE_RAY_TRACING) { struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline); for (unsigned i = 0; i < rt_pipeline->stage_count; i++) { if (radv_ray_tracing_stage_is_compiled(&rt_pipeline->stages[i])) { - struct radv_shader *shader = - container_of(rt_pipeline->stages[i].shader, struct radv_shader, base); + struct radv_shader *shader = container_of(rt_pipeline->stages[i].shader, struct radv_shader, base); radv_dump_shader(device, pipeline, shader, shader->info.stage, dump_dir, f); } } - radv_dump_shader(device, pipeline, pipeline->shaders[MESA_SHADER_INTERSECTION], - MESA_SHADER_INTERSECTION, dump_dir, f); + radv_dump_shader(device, pipeline, pipeline->shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION, + dump_dir, f); } else { - struct radv_compute_pipeline *compute_pipeline = - radv_pipeline_to_compute(pipeline); + struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline); radv_dump_shader(device, &compute_pipeline->base, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE, dump_dir, f); @@ -526,34 +509,30 @@ radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f) fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves); if (pipeline->type == RADV_PIPELINE_GRAPHICS) { - struct radv_graphics_pipeline *graphics_pipeline = - radv_pipeline_to_graphics(pipeline); + struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); /* Dump annotated active graphics shaders. */ unsigned stages = graphics_pipeline->active_stages; while (stages) { int stage = u_bit_scan(&stages); - radv_dump_annotated_shader(graphics_pipeline->base.shaders[stage], stage, waves, - num_waves, f); + radv_dump_annotated_shader(graphics_pipeline->base.shaders[stage], stage, waves, num_waves, f); } } else if (pipeline->type == RADV_PIPELINE_RAY_TRACING) { struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline); for (unsigned i = 0; i < rt_pipeline->stage_count; i++) { if (radv_ray_tracing_stage_is_compiled(&rt_pipeline->stages[i])) { - struct radv_shader *shader = - container_of(rt_pipeline->stages[i].shader, struct radv_shader, base); + struct radv_shader *shader = container_of(rt_pipeline->stages[i].shader, struct radv_shader, base); radv_dump_annotated_shader(shader, shader->info.stage, waves, num_waves, f); } } - radv_dump_annotated_shader(pipeline->shaders[MESA_SHADER_INTERSECTION], - MESA_SHADER_INTERSECTION, waves, num_waves, f); + radv_dump_annotated_shader(pipeline->shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION, waves, + num_waves, f); } else { - struct radv_compute_pipeline *compute_pipeline = - radv_pipeline_to_compute(pipeline); + struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline); - radv_dump_annotated_shader(compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], - MESA_SHADER_COMPUTE, waves, num_waves, f); + radv_dump_annotated_shader(compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE, waves, + num_waves, f); } /* Print waves executing shaders that are not currently bound. */ @@ -567,9 +546,7 @@ radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f) fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n"); found = true; } - fprintf(f, - " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64 - "\n", + fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64 "\n", waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec, waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc); } @@ -578,8 +555,7 @@ radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f) } if (pipeline->type == RADV_PIPELINE_GRAPHICS) { - struct radv_graphics_pipeline *graphics_pipeline = - radv_pipeline_to_graphics(pipeline); + struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); radv_dump_vertex_descriptors(device, graphics_pipeline, f); } radv_dump_descriptors(queue->device, f); @@ -648,8 +624,7 @@ radv_dump_app_info(const struct radv_device *device, FILE *f) fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name); fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version); fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version), - VK_VERSION_MINOR(instance->vk.app_info.api_version), - VK_VERSION_PATCH(instance->vk.app_info.api_version)); + VK_VERSION_MINOR(instance->vk.app_info.api_version), VK_VERSION_PATCH(instance->vk.app_info.api_version)); radv_dump_enabled_options(device, f); } @@ -664,14 +639,14 @@ radv_dump_device_name(const struct radv_device *device, FILE *f) #endif #ifdef _WIN32 - fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", device->physical_device->marketing_name, - info->drm_major, info->drm_minor, info->drm_patchlevel); + fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", device->physical_device->marketing_name, info->drm_major, + info->drm_minor, info->drm_patchlevel); #else if (uname(&uname_data) == 0) snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release); - fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", device->physical_device->marketing_name, - info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version); + fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", device->physical_device->marketing_name, info->drm_major, + info->drm_minor, info->drm_patchlevel, kernel_version); #endif } @@ -686,8 +661,7 @@ radv_dump_umr_ring(const struct radv_queue *queue, FILE *f) if (ring != AMD_IP_GFX) return; - sprintf(cmd, "umr -RS %s 2>&1", - device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx"); + sprintf(cmd, "umr -RS %s 2>&1", device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx"); fprintf(f, "\nUMR GFX ring:\n\n"); radv_dump_cmd(cmd, f); @@ -734,8 +708,8 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs) bool hang_occurred = radv_gpu_hang_occurred(queue, ring); bool vm_fault_occurred = false; if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS) - vm_fault_occurred = ac_vm_fault_occurred(device->physical_device->rad_info.gfx_level, - &device->dmesg_timestamp, &addr); + vm_fault_occurred = + ac_vm_fault_occurred(device->physical_device->rad_info.gfx_level, &device->dmesg_timestamp, &addr); if (!hang_occurred && !vm_fault_occurred) return; @@ -754,8 +728,8 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs) timep = os_localtime(&raw_time, &result); strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep); - snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."), - getpid(), buf_time); + snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."), getpid(), + buf_time); if (mkdir(dump_dir, 0774) && errno != EEXIST) { fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno); abort(); @@ -905,10 +879,10 @@ radv_trap_handler_init(struct radv_device *device) if (result != VK_SUCCESS) return false; - result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM, - RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT, - RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo); + result = ws->buffer_create( + ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM, + RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT, + RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo); if (result != VK_SUCCESS) return false; @@ -976,8 +950,7 @@ radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc) * Buffer size / 4 is the upper bound of the instruction count. */ unsigned num_inst = 0; - struct radv_shader_inst *instructions = - calloc(shader->code_size / 4, sizeof(struct radv_shader_inst)); + struct radv_shader_inst *instructions = calloc(shader->code_size / 4, sizeof(struct radv_shader_inst)); /* Split the disassembly string into instructions. */ si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions); @@ -1014,23 +987,15 @@ radv_dump_sq_hw_regs(struct radv_device *device) fprintf(stderr, "\nHardware registers:\n"); if (device->physical_device->rad_info.gfx_level >= GFX10) { - ac_dump_reg(stderr, gfx_level, family, R_000408_SQ_WAVE_STATUS, - regs->status, ~0); - ac_dump_reg(stderr, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, - regs->trap_sts, ~0); - ac_dump_reg(stderr, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, - regs->hw_id, ~0); - ac_dump_reg(stderr, gfx_level, family, R_00041C_SQ_WAVE_IB_STS, - regs->ib_sts, ~0); + ac_dump_reg(stderr, gfx_level, family, R_000408_SQ_WAVE_STATUS, regs->status, ~0); + ac_dump_reg(stderr, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0); + ac_dump_reg(stderr, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, regs->hw_id, ~0); + ac_dump_reg(stderr, gfx_level, family, R_00041C_SQ_WAVE_IB_STS, regs->ib_sts, ~0); } else { - ac_dump_reg(stderr, gfx_level, family, R_000048_SQ_WAVE_STATUS, - regs->status, ~0); - ac_dump_reg(stderr, gfx_level, family, R_00004C_SQ_WAVE_TRAPSTS, - regs->trap_sts, ~0); - ac_dump_reg(stderr, gfx_level, family, R_000050_SQ_WAVE_HW_ID, - regs->hw_id, ~0); - ac_dump_reg(stderr, gfx_level, family, R_00005C_SQ_WAVE_IB_STS, - regs->ib_sts, ~0); + ac_dump_reg(stderr, gfx_level, family, R_000048_SQ_WAVE_STATUS, regs->status, ~0); + ac_dump_reg(stderr, gfx_level, family, R_00004C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0); + ac_dump_reg(stderr, gfx_level, family, R_000050_SQ_WAVE_HW_ID, regs->hw_id, ~0); + ac_dump_reg(stderr, gfx_level, family, R_00005C_SQ_WAVE_IB_STS, regs->ib_sts, ~0); } fprintf(stderr, "\n\n"); } @@ -1075,8 +1040,7 @@ radv_check_trap_handler(struct radv_queue *queue) uint8_t pc_rewind = (ttmp1 >> 25) & 0xf; uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4); - fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht, - pc_rewind); + fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht, pc_rewind); radv_dump_faulty_shader(device, pc); diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c index 7363fba..f8531f1 100644 --- a/src/amd/vulkan/radv_descriptor_set.c +++ b/src/amd/vulkan/radv_descriptor_set.c @@ -38,18 +38,18 @@ static unsigned radv_descriptor_type_buffer_count(VkDescriptorType type) { switch (type) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: - case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: - return 0; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_MUTABLE_EXT: - return 3; - default: - return 1; + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: + case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: + return 0; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_MUTABLE_EXT: + return 3; + default: + return 1; } } @@ -59,8 +59,7 @@ has_equal_immutable_samplers(const VkSampler *samplers, uint32_t count) if (!samplers) return false; for (uint32_t i = 1; i < count; ++i) { - if (memcmp(radv_sampler_from_handle(samplers[0])->state, - radv_sampler_from_handle(samplers[i])->state, 16)) { + if (memcmp(radv_sampler_from_handle(samplers[0])->state, radv_sampler_from_handle(samplers[i])->state, 16)) { return false; } } @@ -91,8 +90,8 @@ radv_descriptor_alignment(VkDescriptorType type) } static bool -radv_mutable_descriptor_type_size_alignment(const VkMutableDescriptorTypeListVALVE *list, - uint64_t *out_size, uint64_t *out_align) +radv_mutable_descriptor_type_size_alignment(const VkMutableDescriptorTypeListVALVE *list, uint64_t *out_size, + uint64_t *out_align) { uint32_t max_size = 0; uint32_t max_align = 0; @@ -130,8 +129,7 @@ radv_mutable_descriptor_type_size_alignment(const VkMutableDescriptorTypeListVAL VKAPI_ATTR VkResult VKAPI_CALL radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDescriptorSetLayout *pSetLayout) + const VkAllocationCallbacks *pAllocator, VkDescriptorSetLayout *pSetLayout) { RADV_FROM_HANDLE(radv_device, device, _device); struct radv_descriptor_set_layout *set_layout; @@ -154,8 +152,7 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea bool has_ycbcr_sampler = false; for (unsigned i = 0; i < pCreateInfo->pBindings[j].descriptorCount; ++i) { - if (radv_sampler_from_handle(pCreateInfo->pBindings[j].pImmutableSamplers[i]) - ->ycbcr_sampler) + if (radv_sampler_from_handle(pCreateInfo->pBindings[j].pImmutableSamplers[i])->ycbcr_sampler) has_ycbcr_sampler = true; } @@ -193,17 +190,14 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea ycbcr_sampler_offsets = samplers + 4 * immutable_sampler_count; set_layout->ycbcr_sampler_offsets_offset = (char *)ycbcr_sampler_offsets - (char *)set_layout; - uintptr_t first_ycbcr_sampler_offset = - (uintptr_t)ycbcr_sampler_offsets + sizeof(uint32_t) * num_bindings; - first_ycbcr_sampler_offset = - ALIGN(first_ycbcr_sampler_offset, alignof(struct vk_ycbcr_conversion_state)); + uintptr_t first_ycbcr_sampler_offset = (uintptr_t)ycbcr_sampler_offsets + sizeof(uint32_t) * num_bindings; + first_ycbcr_sampler_offset = ALIGN(first_ycbcr_sampler_offset, alignof(struct vk_ycbcr_conversion_state)); ycbcr_samplers = (struct vk_ycbcr_conversion_state *)first_ycbcr_sampler_offset; } else set_layout->ycbcr_sampler_offsets_offset = 0; VkDescriptorSetLayoutBinding *bindings = NULL; - VkResult result = - vk_create_sorted_bindings(pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings); + VkResult result = vk_create_sorted_bindings(pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings); if (result != VK_SUCCESS) { vk_descriptor_set_layout_unref(&device->vk, &set_layout->vk); return vk_error(device, result); @@ -220,14 +214,11 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea uint32_t first_alignment = 32; if (pCreateInfo->bindingCount > 0) { - uint32_t last_alignment = - radv_descriptor_alignment(bindings[pCreateInfo->bindingCount - 1].descriptorType); - if (bindings[pCreateInfo->bindingCount - 1].descriptorType == - VK_DESCRIPTOR_TYPE_MUTABLE_EXT) { + uint32_t last_alignment = radv_descriptor_alignment(bindings[pCreateInfo->bindingCount - 1].descriptorType); + if (bindings[pCreateInfo->bindingCount - 1].descriptorType == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) { uint64_t mutable_size = 0, mutable_align = 0; radv_mutable_descriptor_type_size_alignment( - &mutable_info->pMutableDescriptorTypeLists[pCreateInfo->bindingCount - 1], - &mutable_size, &mutable_align); + &mutable_info->pMutableDescriptorTypeLists[pCreateInfo->bindingCount - 1], &mutable_size, &mutable_align); last_alignment = mutable_align; } @@ -246,8 +237,7 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea /* main image + fmask */ uint32_t max_sampled_image_descriptors = 2; - if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER && - binding->pImmutableSamplers) { + if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER && binding->pImmutableSamplers) { for (unsigned i = 0; i < binding->descriptorCount; ++i) { struct vk_ycbcr_conversion *conversion = radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler; @@ -255,8 +245,7 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea if (conversion) { has_ycbcr_sampler = true; max_sampled_image_descriptors = - MAX2(max_sampled_image_descriptors, - vk_format_get_plane_count(conversion->state.format)); + MAX2(max_sampled_image_descriptors, vk_format_get_plane_count(conversion->state.format)); } } } @@ -294,8 +283,8 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea break; case VK_DESCRIPTOR_TYPE_MUTABLE_EXT: { uint64_t mutable_size = 0, mutable_align = 0; - radv_mutable_descriptor_type_size_alignment( - &mutable_info->pMutableDescriptorTypeLists[j], &mutable_size, &mutable_align); + radv_mutable_descriptor_type_size_alignment(&mutable_info->pMutableDescriptorTypeLists[j], &mutable_size, + &mutable_align); assert(mutable_size && mutable_align); set_layout->binding[b].size = mutable_size; alignment = mutable_align; @@ -312,8 +301,7 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea break; } - if ((pass == 0 && alignment != first_alignment) || - (pass == 1 && alignment == first_alignment)) + if ((pass == 0 && alignment != first_alignment) || (pass == 1 && alignment == first_alignment)) continue; set_layout->size = align(set_layout->size, alignment); @@ -324,10 +312,8 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count; if (variable_flags && binding->binding < variable_flags->bindingCount && - (variable_flags->pBindingFlags[binding->binding] & - VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) { - assert( - !binding->pImmutableSamplers); /* Terribly ill defined how many samplers are valid */ + (variable_flags->pBindingFlags[binding->binding] & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) { + assert(!binding->pImmutableSamplers); /* Terribly ill defined how many samplers are valid */ assert(binding->binding == num_bindings - 1); set_layout->has_variable_descriptors = true; @@ -342,16 +328,14 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea /* Do not optimize space for descriptor buffers and embedded samplers, otherwise the set * layout size/offset are incorrect. */ - if (!(pCreateInfo->flags & - (VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT | - VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT))) { - set_layout->binding[b].immutable_samplers_equal = has_equal_immutable_samplers( - binding->pImmutableSamplers, binding->descriptorCount); + if (!(pCreateInfo->flags & (VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT | + VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT))) { + set_layout->binding[b].immutable_samplers_equal = + has_equal_immutable_samplers(binding->pImmutableSamplers, binding->descriptorCount); } for (uint32_t i = 0; i < binding->descriptorCount; i++) - memcpy(samplers + 4 * i, - &radv_sampler_from_handle(binding->pImmutableSamplers[i])->state, 16); + memcpy(samplers + 4 * i, &radv_sampler_from_handle(binding->pImmutableSamplers[i])->state, 16); /* Don't reserve space for the samplers if they're not accessed. */ if (set_layout->binding[b].immutable_samplers_equal) { @@ -368,8 +352,7 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea ycbcr_sampler_offsets[b] = (const char *)ycbcr_samplers - (const char *)set_layout; for (uint32_t i = 0; i < binding->descriptorCount; i++) { if (radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler) - ycbcr_samplers[i] = radv_sampler_from_handle(binding->pImmutableSamplers[i]) - ->ycbcr_sampler->state; + ycbcr_samplers[i] = radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler->state; else ycbcr_samplers[i].format = VK_FORMAT_UNDEFINED; } @@ -393,8 +376,7 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea * carefully constructed to not have pointers so a full hash instead of a per-field hash * should be ok. */ - uint32_t hash_offset = - offsetof(struct radv_descriptor_set_layout, hash) + sizeof(set_layout->hash); + uint32_t hash_offset = offsetof(struct radv_descriptor_set_layout, hash) + sizeof(set_layout->hash); _mesa_sha1_compute((const char *)set_layout + hash_offset, size - hash_offset, set_layout->hash); *pSetLayout = radv_descriptor_set_layout_to_handle(set_layout); @@ -403,13 +385,11 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea } VKAPI_ATTR void VKAPI_CALL -radv_GetDescriptorSetLayoutSupport(VkDevice device, - const VkDescriptorSetLayoutCreateInfo *pCreateInfo, +radv_GetDescriptorSetLayoutSupport(VkDevice device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo, VkDescriptorSetLayoutSupport *pSupport) { VkDescriptorSetLayoutBinding *bindings = NULL; - VkResult result = - vk_create_sorted_bindings(pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings); + VkResult result = vk_create_sorted_bindings(pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings); if (result != VK_SUCCESS) { pSupport->supported = false; return; @@ -417,8 +397,8 @@ radv_GetDescriptorSetLayoutSupport(VkDevice device, const VkDescriptorSetLayoutBindingFlagsCreateInfo *variable_flags = vk_find_struct_const(pCreateInfo->pNext, DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO); - VkDescriptorSetVariableDescriptorCountLayoutSupport *variable_count = vk_find_struct( - pSupport->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT); + VkDescriptorSetVariableDescriptorCountLayoutSupport *variable_count = + vk_find_struct(pSupport->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT); const VkMutableDescriptorTypeCreateInfoEXT *mutable_info = vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT); if (variable_count) { @@ -427,14 +407,11 @@ radv_GetDescriptorSetLayoutSupport(VkDevice device, uint32_t first_alignment = 32; if (pCreateInfo->bindingCount > 0) { - uint32_t last_alignment = - radv_descriptor_alignment(bindings[pCreateInfo->bindingCount - 1].descriptorType); - if (bindings[pCreateInfo->bindingCount - 1].descriptorType == - VK_DESCRIPTOR_TYPE_MUTABLE_EXT) { + uint32_t last_alignment = radv_descriptor_alignment(bindings[pCreateInfo->bindingCount - 1].descriptorType); + if (bindings[pCreateInfo->bindingCount - 1].descriptorType == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) { uint64_t mutable_size = 0, mutable_align = 0; radv_mutable_descriptor_type_size_alignment( - &mutable_info->pMutableDescriptorTypeLists[pCreateInfo->bindingCount - 1], - &mutable_size, &mutable_align); + &mutable_info->pMutableDescriptorTypeLists[pCreateInfo->bindingCount - 1], &mutable_size, &mutable_align); last_alignment = mutable_align; } @@ -484,9 +461,8 @@ radv_GetDescriptorSetLayoutSupport(VkDevice device, descriptor_count = 1; break; case VK_DESCRIPTOR_TYPE_MUTABLE_EXT: - if (!radv_mutable_descriptor_type_size_alignment( - &mutable_info->pMutableDescriptorTypeLists[i], &descriptor_size, - &descriptor_alignment)) { + if (!radv_mutable_descriptor_type_size_alignment(&mutable_info->pMutableDescriptorTypeLists[i], + &descriptor_size, &descriptor_alignment)) { supported = false; } break; @@ -516,8 +492,7 @@ radv_GetDescriptorSetLayoutSupport(VkDevice device, supported = false; } if (variable_flags && binding->binding < variable_flags->bindingCount && variable_count && - (variable_flags->pBindingFlags[binding->binding] & - VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) { + (variable_flags->pBindingFlags[binding->binding] & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) { variable_count->maxVariableDescriptorCount = MIN2(UINT32_MAX, max_count); } size += descriptor_count * descriptor_size; @@ -534,8 +509,7 @@ radv_GetDescriptorSetLayoutSupport(VkDevice device, * just multiple descriptor set layouts pasted together. */ void -radv_pipeline_layout_init(struct radv_device *device, struct radv_pipeline_layout *layout, - bool independent_sets) +radv_pipeline_layout_init(struct radv_device *device, struct radv_pipeline_layout *layout, bool independent_sets) { memset(layout, 0, sizeof(*layout)); @@ -595,21 +569,18 @@ radv_pipeline_layout_finish(struct radv_device *device, struct radv_pipeline_lay VKAPI_ATTR VkResult VKAPI_CALL radv_CreatePipelineLayout(VkDevice _device, const VkPipelineLayoutCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkPipelineLayout *pPipelineLayout) + const VkAllocationCallbacks *pAllocator, VkPipelineLayout *pPipelineLayout) { RADV_FROM_HANDLE(radv_device, device, _device); struct radv_pipeline_layout *layout; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); - layout = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*layout), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + layout = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*layout), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (layout == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - radv_pipeline_layout_init(device, layout, - pCreateInfo->flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT); + radv_pipeline_layout_init(device, layout, pCreateInfo->flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT); layout->num_sets = pCreateInfo->setLayoutCount; @@ -641,8 +612,7 @@ radv_CreatePipelineLayout(VkDevice _device, const VkPipelineLayoutCreateInfo *pC } VKAPI_ATTR void VKAPI_CALL -radv_DestroyPipelineLayout(VkDevice _device, VkPipelineLayout _pipelineLayout, - const VkAllocationCallbacks *pAllocator) +radv_DestroyPipelineLayout(VkDevice _device, VkPipelineLayout _pipelineLayout, const VkAllocationCallbacks *pAllocator) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, _pipelineLayout); @@ -666,16 +636,12 @@ radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_po struct radv_descriptor_set *set; uint32_t buffer_count = layout->buffer_count; if (variable_count) { - unsigned stride = - radv_descriptor_type_buffer_count(layout->binding[layout->binding_count - 1].type); - buffer_count = - layout->binding[layout->binding_count - 1].buffer_offset + *variable_count * stride; + unsigned stride = radv_descriptor_type_buffer_count(layout->binding[layout->binding_count - 1].type); + buffer_count = layout->binding[layout->binding_count - 1].buffer_offset + *variable_count * stride; } - unsigned range_offset = - sizeof(struct radv_descriptor_set_header) + sizeof(struct radeon_winsys_bo *) * buffer_count; + unsigned range_offset = sizeof(struct radv_descriptor_set_header) + sizeof(struct radeon_winsys_bo *) * buffer_count; const unsigned dynamic_offset_count = layout->dynamic_offset_count; - unsigned mem_size = - range_offset + sizeof(struct radv_descriptor_range) * dynamic_offset_count; + unsigned mem_size = range_offset + sizeof(struct radv_descriptor_range) * dynamic_offset_count; if (pool->host_memory_base) { if (pool->host_memory_end - pool->host_memory_ptr < mem_size) @@ -695,8 +661,7 @@ radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_po vk_object_base_init(&device->vk, &set->header.base, VK_OBJECT_TYPE_DESCRIPTOR_SET); if (dynamic_offset_count) { - set->header.dynamic_descriptors = - (struct radv_descriptor_range *)((uint8_t *)set + range_offset); + set->header.dynamic_descriptors = (struct radv_descriptor_range *)((uint8_t *)set + range_offset); } set->header.layout = layout; @@ -704,8 +669,7 @@ radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_po uint32_t layout_size = layout->size; if (variable_count) { uint32_t stride = layout->binding[layout->binding_count - 1].size; - if (layout->binding[layout->binding_count - 1].type == - VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) + if (layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) stride = 1; layout_size = layout->binding[layout->binding_count - 1].offset + *variable_count * stride; @@ -747,8 +711,7 @@ radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_po set->header.bo = pool->bo; set->header.mapped_ptr = (uint32_t *)(pool->mapped_ptr + offset); set->header.va = pool->bo ? (radv_buffer_get_va(set->header.bo) + offset) : 0; - memmove(&pool->entries[index + 1], &pool->entries[index], - sizeof(pool->entries[0]) * (pool->entry_count - index)); + memmove(&pool->entries[index + 1], &pool->entries[index], sizeof(pool->entries[0]) * (pool->entry_count - index)); pool->entries[index].offset = offset; pool->entries[index].size = layout_size; pool->entries[index].set = set; @@ -757,8 +720,7 @@ radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_po if (layout->has_immutable_samplers) { for (unsigned i = 0; i < layout->binding_count; ++i) { - if (!layout->binding[i].immutable_samplers_offset || - layout->binding[i].immutable_samplers_equal) + if (!layout->binding[i].immutable_samplers_offset || layout->binding[i].immutable_samplers_equal) continue; unsigned offset = layout->binding[i].offset / 4; @@ -791,8 +753,7 @@ radv_descriptor_set_destroy(struct radv_device *device, struct radv_descriptor_p if (free_bo && !pool->host_memory_base) { for (int i = 0; i < pool->entry_count; ++i) { if (pool->entries[i].set == set) { - memmove(&pool->entries[i], &pool->entries[i + 1], - sizeof(pool->entries[i]) * (pool->entry_count - i - 1)); + memmove(&pool->entries[i], &pool->entries[i + 1], sizeof(pool->entries[i]) * (pool->entry_count - i - 1)); --pool->entry_count; break; } @@ -830,10 +791,9 @@ radv_destroy_descriptor_pool(struct radv_device *device, const VkAllocationCallb } VkResult -radv_create_descriptor_pool(struct radv_device *device, - const VkDescriptorPoolCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDescriptorPool *pDescriptorPool, bool is_internal) +radv_create_descriptor_pool(struct radv_device *device, const VkDescriptorPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkDescriptorPool *pDescriptorPool, + bool is_internal) { struct radv_descriptor_pool *pool; uint64_t size = sizeof(struct radv_descriptor_pool); @@ -842,8 +802,7 @@ radv_create_descriptor_pool(struct radv_device *device, const VkMutableDescriptorTypeCreateInfoEXT *mutable_info = vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT); - vk_foreach_struct_const(ext, pCreateInfo->pNext) - { + vk_foreach_struct_const (ext, pCreateInfo->pNext) { switch (ext->sType) { case VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO: { const VkDescriptorPoolInlineUniformBlockCreateInfo *info = @@ -862,7 +821,7 @@ radv_create_descriptor_pool(struct radv_device *device, uint64_t num_16byte_descriptors = 0; for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) { bo_count += radv_descriptor_type_buffer_count(pCreateInfo->pPoolSizes[i].type) * - pCreateInfo->pPoolSizes[i].descriptorCount; + pCreateInfo->pPoolSizes[i].descriptorCount; switch (pCreateInfo->pPoolSizes[i].type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: @@ -892,9 +851,8 @@ radv_create_descriptor_pool(struct radv_device *device, * we must allocate enough for any supported mutable descriptor type, i.e. 64 bytes. */ if (mutable_info && i < mutable_info->mutableDescriptorTypeListCount) { uint64_t mutable_size, mutable_alignment; - if (radv_mutable_descriptor_type_size_alignment( - &mutable_info->pMutableDescriptorTypeLists[i], &mutable_size, - &mutable_alignment)) { + if (radv_mutable_descriptor_type_size_alignment(&mutable_info->pMutableDescriptorTypeLists[i], + &mutable_size, &mutable_alignment)) { /* 32 as we may need to align for images */ mutable_size = align(mutable_size, 32); bo_size += mutable_size * pCreateInfo->pPoolSizes[i].descriptorCount; @@ -951,15 +909,13 @@ radv_create_descriptor_pool(struct radv_device *device, if (bo_size) { if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_VALVE)) { - enum radeon_bo_flag flags = RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | - RADEON_FLAG_32BIT; + enum radeon_bo_flag flags = RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT; if (device->instance->zero_vram) flags |= RADEON_FLAG_ZERO_VRAM; - VkResult result = device->ws->buffer_create( - device->ws, bo_size, 32, RADEON_DOMAIN_VRAM, flags, RADV_BO_PRIORITY_DESCRIPTOR, 0, - &pool->bo); + VkResult result = device->ws->buffer_create(device->ws, bo_size, 32, RADEON_DOMAIN_VRAM, flags, + RADV_BO_PRIORITY_DESCRIPTOR, 0, &pool->bo); if (result != VK_SUCCESS) { radv_destroy_descriptor_pool(device, pAllocator, pool); return vk_error(device, result); @@ -970,8 +926,7 @@ radv_create_descriptor_pool(struct radv_device *device, return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); } } else { - pool->host_bo = - vk_alloc2(&device->vk.alloc, pAllocator, bo_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + pool->host_bo = vk_alloc2(&device->vk.alloc, pAllocator, bo_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!pool->host_bo) { radv_destroy_descriptor_pool(device, pAllocator, pool); return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -989,16 +944,14 @@ radv_create_descriptor_pool(struct radv_device *device, VKAPI_ATTR VkResult VKAPI_CALL radv_CreateDescriptorPool(VkDevice _device, const VkDescriptorPoolCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDescriptorPool *pDescriptorPool) + const VkAllocationCallbacks *pAllocator, VkDescriptorPool *pDescriptorPool) { RADV_FROM_HANDLE(radv_device, device, _device); return radv_create_descriptor_pool(device, pCreateInfo, pAllocator, pDescriptorPool, false); } VKAPI_ATTR void VKAPI_CALL -radv_DestroyDescriptorPool(VkDevice _device, VkDescriptorPool _pool, - const VkAllocationCallbacks *pAllocator) +radv_DestroyDescriptorPool(VkDevice _device, VkDescriptorPool _pool, const VkAllocationCallbacks *pAllocator) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_descriptor_pool, pool, _pool); @@ -1010,8 +963,7 @@ radv_DestroyDescriptorPool(VkDevice _device, VkDescriptorPool _pool, } VKAPI_ATTR VkResult VKAPI_CALL -radv_ResetDescriptorPool(VkDevice _device, VkDescriptorPool descriptorPool, - VkDescriptorPoolResetFlags flags) +radv_ResetDescriptorPool(VkDevice _device, VkDescriptorPool descriptorPool, VkDescriptorPoolResetFlags flags) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool); @@ -1045,8 +997,8 @@ radv_AllocateDescriptorSets(VkDevice _device, const VkDescriptorSetAllocateInfo uint32_t i; struct radv_descriptor_set *set = NULL; - const VkDescriptorSetVariableDescriptorCountAllocateInfo *variable_counts = vk_find_struct_const( - pAllocateInfo->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO); + const VkDescriptorSetVariableDescriptorCountAllocateInfo *variable_counts = + vk_find_struct_const(pAllocateInfo->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO); const uint32_t zero = 0; /* allocate a set of buffers for each shader to contain descriptors */ @@ -1096,9 +1048,8 @@ radv_FreeDescriptorSets(VkDevice _device, VkDescriptorPool descriptorPool, uint3 } static ALWAYS_INLINE void -write_texel_buffer_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, - unsigned *dst, struct radeon_winsys_bo **buffer_list, - const VkBufferView _buffer_view) +write_texel_buffer_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, unsigned *dst, + struct radeon_winsys_bo **buffer_list, const VkBufferView _buffer_view) { RADV_FROM_HANDLE(radv_buffer_view, buffer_view, _buffer_view); @@ -1128,19 +1079,17 @@ write_buffer_descriptor(struct radv_device *device, unsigned *dst, uint64_t va, return; } - uint32_t rsrc_word3 = - S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); + uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (device->physical_device->rad_info.gfx_level >= GFX11) { - rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); + rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); } else if (device->physical_device->rad_info.gfx_level >= GFX10) { - rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); + rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | + S_008F0C_RESOURCE_LEVEL(1); } else { - rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + rsrc_word3 |= + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); } dst[0] = va; @@ -1150,9 +1099,8 @@ write_buffer_descriptor(struct radv_device *device, unsigned *dst, uint64_t va, } static ALWAYS_INLINE void -write_buffer_descriptor_impl(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, - unsigned *dst, struct radeon_winsys_bo **buffer_list, - const VkDescriptorBufferInfo *buffer_info) +write_buffer_descriptor_impl(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, unsigned *dst, + struct radeon_winsys_bo **buffer_list, const VkDescriptorBufferInfo *buffer_info) { RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer); uint64_t va = 0, range = 0; @@ -1199,8 +1147,7 @@ write_block_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_b static ALWAYS_INLINE void write_dynamic_buffer_descriptor(struct radv_device *device, struct radv_descriptor_range *range, - struct radeon_winsys_bo **buffer_list, - const VkDescriptorBufferInfo *buffer_info) + struct radeon_winsys_bo **buffer_list, const VkDescriptorBufferInfo *buffer_info) { RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer); uint64_t va; @@ -1256,9 +1203,9 @@ write_image_descriptor(unsigned *dst, unsigned size, VkDescriptorType descriptor } static ALWAYS_INLINE void -write_image_descriptor_impl(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, - unsigned size, unsigned *dst, struct radeon_winsys_bo **buffer_list, - VkDescriptorType descriptor_type, const VkDescriptorImageInfo *image_info) +write_image_descriptor_impl(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, unsigned size, + unsigned *dst, struct radeon_winsys_bo **buffer_list, VkDescriptorType descriptor_type, + const VkDescriptorImageInfo *image_info) { RADV_FROM_HANDLE(radv_image_view, iview, image_info->imageView); @@ -1273,8 +1220,7 @@ write_image_descriptor_impl(struct radv_device *device, struct radv_cmd_buffer * return; } - const uint32_t max_bindings = sizeof(iview->image->bindings) / - sizeof(iview->image->bindings[0]); + const uint32_t max_bindings = sizeof(iview->image->bindings) / sizeof(iview->image->bindings[0]); for (uint32_t b = 0; b < max_bindings; b++) { if (cmd_buffer) { if (iview->image->bindings[b].bo) @@ -1287,14 +1233,12 @@ write_image_descriptor_impl(struct radv_device *device, struct radv_cmd_buffer * } static ALWAYS_INLINE void -write_combined_image_sampler_descriptor(struct radv_device *device, - struct radv_cmd_buffer *cmd_buffer, unsigned sampler_offset, - unsigned *dst, struct radeon_winsys_bo **buffer_list, - VkDescriptorType descriptor_type, - const VkDescriptorImageInfo *image_info, bool has_sampler) +write_combined_image_sampler_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, + unsigned sampler_offset, unsigned *dst, struct radeon_winsys_bo **buffer_list, + VkDescriptorType descriptor_type, const VkDescriptorImageInfo *image_info, + bool has_sampler) { - write_image_descriptor_impl(device, cmd_buffer, sampler_offset, dst, buffer_list, descriptor_type, - image_info); + write_image_descriptor_impl(device, cmd_buffer, sampler_offset, dst, buffer_list, descriptor_type, image_info); /* copy over sampler state */ if (has_sampler) { RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler); @@ -1324,15 +1268,13 @@ write_accel_struct(struct radv_device *device, void *ptr, VkDeviceAddress va) static ALWAYS_INLINE void radv_update_descriptor_sets_impl(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, VkDescriptorSet dstSetOverride, uint32_t descriptorWriteCount, - const VkWriteDescriptorSet *pDescriptorWrites, - uint32_t descriptorCopyCount, + const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount, const VkCopyDescriptorSet *pDescriptorCopies) { uint32_t i, j; for (i = 0; i < descriptorWriteCount; i++) { const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i]; - RADV_FROM_HANDLE(radv_descriptor_set, set, - dstSetOverride ? dstSetOverride : writeset->dstSet); + RADV_FROM_HANDLE(radv_descriptor_set, set, dstSetOverride ? dstSetOverride : writeset->dstSet); const struct radv_descriptor_set_binding_layout *binding_layout = set->header.layout->binding + writeset->dstBinding; uint32_t *ptr = set->header.mapped_ptr; @@ -1341,21 +1283,18 @@ radv_update_descriptor_sets_impl(struct radv_device *device, struct radv_cmd_buf * allocated, so if we are writing push descriptors we have to copy the * immutable samplers into them now. */ - const bool copy_immutable_samplers = cmd_buffer && - binding_layout->immutable_samplers_offset && - !binding_layout->immutable_samplers_equal; + const bool copy_immutable_samplers = + cmd_buffer && binding_layout->immutable_samplers_offset && !binding_layout->immutable_samplers_equal; const uint32_t *samplers = radv_immutable_samplers(set->header.layout, binding_layout); const VkWriteDescriptorSetAccelerationStructureKHR *accel_structs = NULL; ptr += binding_layout->offset / 4; if (writeset->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { - write_block_descriptor(device, cmd_buffer, (uint8_t *)ptr + writeset->dstArrayElement, - writeset); + write_block_descriptor(device, cmd_buffer, (uint8_t *)ptr + writeset->dstArrayElement, writeset); continue; } else if (writeset->descriptorType == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) { - accel_structs = - vk_find_struct_const(writeset->pNext, WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR); + accel_structs = vk_find_struct_const(writeset->pNext, WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR); } ptr += binding_layout->size * writeset->dstArrayElement / 4; @@ -1367,36 +1306,33 @@ radv_update_descriptor_sets_impl(struct radv_device *device, struct radv_cmd_buf case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { unsigned idx = writeset->dstArrayElement + j; idx += binding_layout->dynamic_offset_offset; - assert(!(set->header.layout->flags & - VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); - write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx, - buffer_list, writeset->pBufferInfo + j); + assert(!(set->header.layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); + write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx, buffer_list, + writeset->pBufferInfo + j); break; } case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - write_buffer_descriptor_impl(device, cmd_buffer, ptr, buffer_list, - writeset->pBufferInfo + j); + write_buffer_descriptor_impl(device, cmd_buffer, ptr, buffer_list, writeset->pBufferInfo + j); break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - write_texel_buffer_descriptor(device, cmd_buffer, ptr, buffer_list, - writeset->pTexelBufferView[j]); + write_texel_buffer_descriptor(device, cmd_buffer, ptr, buffer_list, writeset->pTexelBufferView[j]); break; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - write_image_descriptor_impl(device, cmd_buffer, 32, ptr, buffer_list, - writeset->descriptorType, writeset->pImageInfo + j); + write_image_descriptor_impl(device, cmd_buffer, 32, ptr, buffer_list, writeset->descriptorType, + writeset->pImageInfo + j); break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - write_image_descriptor_impl(device, cmd_buffer, 64, ptr, buffer_list, - writeset->descriptorType, writeset->pImageInfo + j); + write_image_descriptor_impl(device, cmd_buffer, 64, ptr, buffer_list, writeset->descriptorType, + writeset->pImageInfo + j); break; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: { unsigned sampler_offset = radv_combined_image_descriptor_sampler_offset(binding_layout); - write_combined_image_sampler_descriptor( - device, cmd_buffer, sampler_offset, ptr, buffer_list, writeset->descriptorType, - writeset->pImageInfo + j, !binding_layout->immutable_samplers_offset); + write_combined_image_sampler_descriptor(device, cmd_buffer, sampler_offset, ptr, buffer_list, + writeset->descriptorType, writeset->pImageInfo + j, + !binding_layout->immutable_samplers_offset); if (copy_immutable_samplers) { const unsigned idx = writeset->dstArrayElement + j; memcpy((char *)ptr + sampler_offset, samplers + 4 * idx, 16); @@ -1413,11 +1349,9 @@ radv_update_descriptor_sets_impl(struct radv_device *device, struct radv_cmd_buf } break; case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { - RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, - accel_structs->pAccelerationStructures[j]); + RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, accel_structs->pAccelerationStructures[j]); - write_accel_struct(device, ptr, - accel_struct ? vk_acceleration_structure_get_va(accel_struct) : 0); + write_accel_struct(device, ptr, accel_struct ? vk_acceleration_structure_get_va(accel_struct) : 0); break; } default: @@ -1503,32 +1437,29 @@ radv_update_descriptor_sets_impl(struct radv_device *device, struct radv_cmd_buf VKAPI_ATTR void VKAPI_CALL radv_UpdateDescriptorSets(VkDevice _device, uint32_t descriptorWriteCount, - const VkWriteDescriptorSet *pDescriptorWrites, - uint32_t descriptorCopyCount, + const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount, const VkCopyDescriptorSet *pDescriptorCopies) { RADV_FROM_HANDLE(radv_device, device, _device); - radv_update_descriptor_sets_impl(device, NULL, VK_NULL_HANDLE, descriptorWriteCount, - pDescriptorWrites, descriptorCopyCount, pDescriptorCopies); + radv_update_descriptor_sets_impl(device, NULL, VK_NULL_HANDLE, descriptorWriteCount, pDescriptorWrites, + descriptorCopyCount, pDescriptorCopies); } void radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, VkDescriptorSet dstSetOverride, uint32_t descriptorWriteCount, - const VkWriteDescriptorSet *pDescriptorWrites, - uint32_t descriptorCopyCount, + const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount, const VkCopyDescriptorSet *pDescriptorCopies) { /* Assume cmd_buffer != NULL to optimize out cmd_buffer checks in generic code above. */ assume(cmd_buffer != NULL); - radv_update_descriptor_sets_impl(device, cmd_buffer, dstSetOverride, descriptorWriteCount, - pDescriptorWrites, descriptorCopyCount, pDescriptorCopies); + radv_update_descriptor_sets_impl(device, cmd_buffer, dstSetOverride, descriptorWriteCount, pDescriptorWrites, + descriptorCopyCount, pDescriptorCopies); } VKAPI_ATTR VkResult VKAPI_CALL -radv_CreateDescriptorUpdateTemplate(VkDevice _device, - const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo, +radv_CreateDescriptorUpdateTemplate(VkDevice _device, const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate) { @@ -1565,8 +1496,7 @@ radv_CreateDescriptorUpdateTemplate(VkDevice _device, for (i = 0; i < entry_count; i++) { const VkDescriptorUpdateTemplateEntry *entry = &pCreateInfo->pDescriptorUpdateEntries[i]; - const struct radv_descriptor_set_binding_layout *binding_layout = - set_layout->binding + entry->dstBinding; + const struct radv_descriptor_set_binding_layout *binding_layout = set_layout->binding + entry->dstBinding; const uint32_t buffer_offset = binding_layout->buffer_offset + entry->dstArrayElement; const uint32_t *immutable_samplers = NULL; uint32_t dst_offset; @@ -1586,12 +1516,9 @@ radv_CreateDescriptorUpdateTemplate(VkDevice _device, case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_SAMPLER: /* Immutable samplers are copied into push descriptors when they are pushed */ - if (pCreateInfo->templateType == - VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR && - binding_layout->immutable_samplers_offset && - !binding_layout->immutable_samplers_equal) { - immutable_samplers = - radv_immutable_samplers(set_layout, binding_layout) + entry->dstArrayElement * 4; + if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR && + binding_layout->immutable_samplers_offset && !binding_layout->immutable_samplers_equal) { + immutable_samplers = radv_immutable_samplers(set_layout, binding_layout) + entry->dstArrayElement * 4; } break; default: @@ -1625,8 +1552,7 @@ radv_CreateDescriptorUpdateTemplate(VkDevice _device, } VKAPI_ATTR void VKAPI_CALL -radv_DestroyDescriptorUpdateTemplate(VkDevice _device, - VkDescriptorUpdateTemplate descriptorUpdateTemplate, +radv_DestroyDescriptorUpdateTemplate(VkDevice _device, VkDescriptorUpdateTemplate descriptorUpdateTemplate, const VkAllocationCallbacks *pAllocator) { RADV_FROM_HANDLE(radv_device, device, _device); @@ -1640,11 +1566,9 @@ radv_DestroyDescriptorUpdateTemplate(VkDevice _device, } static ALWAYS_INLINE void -radv_update_descriptor_set_with_template_impl(struct radv_device *device, - struct radv_cmd_buffer *cmd_buffer, +radv_update_descriptor_set_with_template_impl(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_set *set, - VkDescriptorUpdateTemplate descriptorUpdateTemplate, - const void *pData) + VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData) { RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate); uint32_t i; @@ -1665,56 +1589,46 @@ radv_update_descriptor_set_with_template_impl(struct radv_device *device, case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { const unsigned idx = templ->entry[i].dst_offset + j; - assert(!(set->header.layout->flags & - VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); - write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx, - buffer_list, (struct VkDescriptorBufferInfo *)pSrc); + assert(!(set->header.layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); + write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx, buffer_list, + (struct VkDescriptorBufferInfo *)pSrc); break; } case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - write_buffer_descriptor_impl(device, cmd_buffer, pDst, buffer_list, - (struct VkDescriptorBufferInfo *)pSrc); + write_buffer_descriptor_impl(device, cmd_buffer, pDst, buffer_list, (struct VkDescriptorBufferInfo *)pSrc); break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - write_texel_buffer_descriptor(device, cmd_buffer, pDst, buffer_list, - *(VkBufferView *)pSrc); + write_texel_buffer_descriptor(device, cmd_buffer, pDst, buffer_list, *(VkBufferView *)pSrc); break; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - write_image_descriptor_impl(device, cmd_buffer, 32, pDst, buffer_list, - templ->entry[i].descriptor_type, + write_image_descriptor_impl(device, cmd_buffer, 32, pDst, buffer_list, templ->entry[i].descriptor_type, (struct VkDescriptorImageInfo *)pSrc); break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - write_image_descriptor_impl(device, cmd_buffer, 64, pDst, buffer_list, - templ->entry[i].descriptor_type, + write_image_descriptor_impl(device, cmd_buffer, 64, pDst, buffer_list, templ->entry[i].descriptor_type, (struct VkDescriptorImageInfo *)pSrc); break; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - write_combined_image_sampler_descriptor( - device, cmd_buffer, templ->entry[i].sampler_offset, pDst, buffer_list, - templ->entry[i].descriptor_type, (struct VkDescriptorImageInfo *)pSrc, - templ->entry[i].has_sampler); + write_combined_image_sampler_descriptor(device, cmd_buffer, templ->entry[i].sampler_offset, pDst, + buffer_list, templ->entry[i].descriptor_type, + (struct VkDescriptorImageInfo *)pSrc, templ->entry[i].has_sampler); if (cmd_buffer && templ->entry[i].immutable_samplers) { - memcpy((char *)pDst + templ->entry[i].sampler_offset, - templ->entry[i].immutable_samplers + 4 * j, 16); + memcpy((char *)pDst + templ->entry[i].sampler_offset, templ->entry[i].immutable_samplers + 4 * j, 16); } break; case VK_DESCRIPTOR_TYPE_SAMPLER: if (templ->entry[i].has_sampler) { const VkDescriptorImageInfo *pImageInfo = (struct VkDescriptorImageInfo *)pSrc; write_sampler_descriptor(pDst, pImageInfo->sampler); - } - else if (cmd_buffer && templ->entry[i].immutable_samplers) + } else if (cmd_buffer && templ->entry[i].immutable_samplers) memcpy(pDst, templ->entry[i].immutable_samplers + 4 * j, 16); break; case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { - RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, - *(const VkAccelerationStructureKHR *)pSrc); - write_accel_struct(device, pDst, - accel_struct ? vk_acceleration_structure_get_va(accel_struct) : 0); + RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, *(const VkAccelerationStructureKHR *)pSrc); + write_accel_struct(device, pDst, accel_struct ? vk_acceleration_structure_get_va(accel_struct) : 0); break; } default: @@ -1728,11 +1642,9 @@ radv_update_descriptor_set_with_template_impl(struct radv_device *device, } void -radv_cmd_update_descriptor_set_with_template(struct radv_device *device, - struct radv_cmd_buffer *cmd_buffer, +radv_cmd_update_descriptor_set_with_template(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_set *set, - VkDescriptorUpdateTemplate descriptorUpdateTemplate, - const void *pData) + VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData) { /* Assume cmd_buffer != NULL to optimize out cmd_buffer checks in generic code above. */ assume(cmd_buffer != NULL); @@ -1741,8 +1653,7 @@ radv_cmd_update_descriptor_set_with_template(struct radv_device *device, VKAPI_ATTR void VKAPI_CALL radv_UpdateDescriptorSetWithTemplate(VkDevice _device, VkDescriptorSet descriptorSet, - VkDescriptorUpdateTemplate descriptorUpdateTemplate, - const void *pData) + VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_descriptor_set, set, descriptorSet); @@ -1751,23 +1662,21 @@ radv_UpdateDescriptorSetWithTemplate(VkDevice _device, VkDescriptorSet descripto } VKAPI_ATTR void VKAPI_CALL -radv_GetDescriptorSetLayoutHostMappingInfoVALVE( - VkDevice _device, const VkDescriptorSetBindingReferenceVALVE *pBindingReference, - VkDescriptorSetLayoutHostMappingInfoVALVE *pHostMapping) +radv_GetDescriptorSetLayoutHostMappingInfoVALVE(VkDevice _device, + const VkDescriptorSetBindingReferenceVALVE *pBindingReference, + VkDescriptorSetLayoutHostMappingInfoVALVE *pHostMapping) { struct radv_descriptor_set_layout *set_layout = radv_descriptor_set_layout_from_handle(pBindingReference->descriptorSetLayout); - const struct radv_descriptor_set_binding_layout *binding_layout = - set_layout->binding + pBindingReference->binding; + const struct radv_descriptor_set_binding_layout *binding_layout = set_layout->binding + pBindingReference->binding; pHostMapping->descriptorOffset = binding_layout->offset; pHostMapping->descriptorSize = binding_layout->size; } VKAPI_ATTR void VKAPI_CALL -radv_GetDescriptorSetHostMappingVALVE(VkDevice _device, VkDescriptorSet descriptorSet, - void **ppData) +radv_GetDescriptorSetHostMappingVALVE(VkDevice _device, VkDescriptorSet descriptorSet, void **ppData) { RADV_FROM_HANDLE(radv_descriptor_set, set, descriptorSet); *ppData = set->header.mapped_ptr; @@ -1775,24 +1684,23 @@ radv_GetDescriptorSetHostMappingVALVE(VkDevice _device, VkDescriptorSet descript /* VK_EXT_descriptor_buffer */ VKAPI_ATTR void VKAPI_CALL -radv_GetDescriptorSetLayoutSizeEXT(VkDevice device, VkDescriptorSetLayout layout, - VkDeviceSize *pLayoutSizeInBytes) +radv_GetDescriptorSetLayoutSizeEXT(VkDevice device, VkDescriptorSetLayout layout, VkDeviceSize *pLayoutSizeInBytes) { RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, layout); *pLayoutSizeInBytes = set_layout->size; } VKAPI_ATTR void VKAPI_CALL -radv_GetDescriptorSetLayoutBindingOffsetEXT(VkDevice device, VkDescriptorSetLayout layout, - uint32_t binding, VkDeviceSize *pOffset) +radv_GetDescriptorSetLayoutBindingOffsetEXT(VkDevice device, VkDescriptorSetLayout layout, uint32_t binding, + VkDeviceSize *pOffset) { RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, layout); *pOffset = set_layout->binding[binding].offset; } VKAPI_ATTR void VKAPI_CALL -radv_GetDescriptorEXT(VkDevice _device, const VkDescriptorGetInfoEXT *pDescriptorInfo, - size_t dataSize, void *pDescriptor) +radv_GetDescriptorEXT(VkDevice _device, const VkDescriptorGetInfoEXT *pDescriptorInfo, size_t dataSize, + void *pDescriptor) { RADV_FROM_HANDLE(radv_device, device, _device); @@ -1801,24 +1709,19 @@ radv_GetDescriptorEXT(VkDevice _device, const VkDescriptorGetInfoEXT *pDescripto write_sampler_descriptor(pDescriptor, *pDescriptorInfo->data.pSampler); break; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - write_image_descriptor(pDescriptor, 64, pDescriptorInfo->type, - pDescriptorInfo->data.pCombinedImageSampler); + write_image_descriptor(pDescriptor, 64, pDescriptorInfo->type, pDescriptorInfo->data.pCombinedImageSampler); if (pDescriptorInfo->data.pCombinedImageSampler) { - write_sampler_descriptor((uint32_t *)pDescriptor + 20, - pDescriptorInfo->data.pCombinedImageSampler->sampler); + write_sampler_descriptor((uint32_t *)pDescriptor + 20, pDescriptorInfo->data.pCombinedImageSampler->sampler); } break; case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - write_image_descriptor(pDescriptor, 64, pDescriptorInfo->type, - pDescriptorInfo->data.pInputAttachmentImage); + write_image_descriptor(pDescriptor, 64, pDescriptorInfo->type, pDescriptorInfo->data.pInputAttachmentImage); break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - write_image_descriptor(pDescriptor, 64, pDescriptorInfo->type, - pDescriptorInfo->data.pSampledImage); + write_image_descriptor(pDescriptor, 64, pDescriptorInfo->type, pDescriptorInfo->data.pSampledImage); break; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - write_image_descriptor(pDescriptor, 32, pDescriptorInfo->type, - pDescriptorInfo->data.pStorageImage); + write_image_descriptor(pDescriptor, 32, pDescriptorInfo->type, pDescriptorInfo->data.pStorageImage); break; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { const VkDescriptorAddressInfoEXT *addr_info = pDescriptorInfo->data.pUniformBuffer; @@ -1838,8 +1741,8 @@ radv_GetDescriptorEXT(VkDevice _device, const VkDescriptorGetInfoEXT *pDescripto const VkDescriptorAddressInfoEXT *addr_info = pDescriptorInfo->data.pUniformTexelBuffer; if (addr_info && addr_info->address) { - radv_make_texel_buffer_descriptor(device, addr_info->address, addr_info->format, 0, - addr_info->range, pDescriptor); + radv_make_texel_buffer_descriptor(device, addr_info->address, addr_info->format, 0, addr_info->range, + pDescriptor); } else { memset(pDescriptor, 0, 4 * 4); } @@ -1849,8 +1752,8 @@ radv_GetDescriptorEXT(VkDevice _device, const VkDescriptorGetInfoEXT *pDescripto const VkDescriptorAddressInfoEXT *addr_info = pDescriptorInfo->data.pStorageTexelBuffer; if (addr_info && addr_info->address) { - radv_make_texel_buffer_descriptor(device, addr_info->address, addr_info->format, 0, - addr_info->range, pDescriptor); + radv_make_texel_buffer_descriptor(device, addr_info->address, addr_info->format, 0, addr_info->range, + pDescriptor); } else { memset(pDescriptor, 0, 4 * 4); } diff --git a/src/amd/vulkan/radv_descriptor_set.h b/src/amd/vulkan/radv_descriptor_set.h index 77a3b3c..19249bf 100644 --- a/src/amd/vulkan/radv_descriptor_set.h +++ b/src/amd/vulkan/radv_descriptor_set.h @@ -114,8 +114,7 @@ radv_immutable_samplers(const struct radv_descriptor_set_layout *set, } static inline unsigned -radv_combined_image_descriptor_sampler_offset( - const struct radv_descriptor_set_binding_layout *binding) +radv_combined_image_descriptor_sampler_offset(const struct radv_descriptor_set_binding_layout *binding) { return binding->size - ((!binding->immutable_samplers_equal) ? 16 : 0); } @@ -126,8 +125,7 @@ radv_immutable_ycbcr_samplers(const struct radv_descriptor_set_layout *set, unsi if (!set->ycbcr_sampler_offsets_offset) return NULL; - const uint32_t *offsets = - (const uint32_t *)((const char *)set + set->ycbcr_sampler_offsets_offset); + const uint32_t *offsets = (const uint32_t *)((const char *)set + set->ycbcr_sampler_offsets_offset); if (offsets[binding_index] == 0) return NULL; @@ -136,8 +134,7 @@ radv_immutable_ycbcr_samplers(const struct radv_descriptor_set_layout *set, unsi struct radv_device; -void radv_pipeline_layout_init(struct radv_device *device, struct radv_pipeline_layout *layout, - bool independent_sets); +void radv_pipeline_layout_init(struct radv_device *device, struct radv_pipeline_layout *layout, bool independent_sets); void radv_pipeline_layout_add_set(struct radv_pipeline_layout *layout, uint32_t set_idx, struct radv_descriptor_set_layout *set_layout); void radv_pipeline_layout_hash(struct radv_pipeline_layout *layout); diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 86ff186..e4c6100 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -68,6 +68,7 @@ typedef void *drmDevicePtr; #include "util/os_time.h" #include "util/timespec.h" #include "util/u_atomic.h" +#include "vulkan/vk_icd.h" #include "winsys/null/radv_null_winsys_public.h" #include "git_sha1.h" #include "sid.h" @@ -75,7 +76,6 @@ typedef void *drmDevicePtr; #include "vk_format.h" #include "vk_sync.h" #include "vk_sync_dummy.h" -#include "vulkan/vk_icd.h" #ifdef LLVM_AVAILABLE #include "ac_llvm_util.h" @@ -88,9 +88,9 @@ radv_spm_trace_enabled() } VKAPI_ATTR VkResult VKAPI_CALL -radv_GetMemoryHostPointerPropertiesEXT( - VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, const void *pHostPointer, - VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties) +radv_GetMemoryHostPointerPropertiesEXT(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, + const void *pHostPointer, + VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties) { RADV_FROM_HANDLE(radv_device, device, _device); @@ -118,10 +118,10 @@ radv_device_init_border_color(struct radv_device *device) { VkResult result; - result = device->ws->buffer_create( - device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM, - RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING, - RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo); + result = + device->ws->buffer_create(device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM, + RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING, + RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo); if (result != VK_SUCCESS) return vk_error(device, result); @@ -207,8 +207,7 @@ radv_device_init_vs_prologs(struct radv_device *device) if (!prolog) return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); - assert(idx == - radv_instance_rate_prolog_index(num_attributes, state.instance_rate_inputs)); + assert(idx == radv_instance_rate_prolog_index(num_attributes, state.instance_rate_inputs)); device->instance_rate_vs_prologs[idx++] = prolog; } } @@ -222,8 +221,7 @@ static void radv_device_finish_vs_prologs(struct radv_device *device) { if (device->vs_prologs) { - hash_table_foreach(device->vs_prologs, entry) - { + hash_table_foreach (device->vs_prologs, entry) { free((void *)entry->key); radv_shader_part_unref(device, entry->data); } @@ -261,8 +259,7 @@ static void radv_device_finish_ps_epilogs(struct radv_device *device) { if (device->ps_epilogs) { - hash_table_foreach(device->ps_epilogs, entry) - { + hash_table_foreach (device->ps_epilogs, entry) { free((void *)entry->key); radv_shader_part_unref(device, entry->data); } @@ -294,9 +291,9 @@ radv_device_init_vrs_state(struct radv_device *device) .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }; - result = radv_image_create(radv_device_to_handle(device), - &(struct radv_image_create_info){.vk_info = &image_create_info}, - &device->meta_state.alloc, &image, true); + result = + radv_image_create(radv_device_to_handle(device), &(struct radv_image_create_info){.vk_info = &image_create_info}, + &device->meta_state.alloc, &image, true); if (result != VK_SUCCESS) return result; @@ -307,8 +304,7 @@ radv_device_init_vrs_state(struct radv_device *device) .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; - result = - radv_create_buffer(device, &buffer_create_info, &device->meta_state.alloc, &buffer, true); + result = radv_create_buffer(device, &buffer_create_info, &device->meta_state.alloc, &buffer, true); if (result != VK_SUCCESS) goto fail_create; @@ -330,12 +326,10 @@ radv_device_init_vrs_state(struct radv_device *device) if (result != VK_SUCCESS) goto fail_alloc; - VkBindBufferMemoryInfo bind_info = { - .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, - .buffer = buffer, - .memory = mem, - .memoryOffset = 0 - }; + VkBindBufferMemoryInfo bind_info = {.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, + .buffer = buffer, + .memory = mem, + .memoryOffset = 0}; result = radv_BindBufferMemory2(radv_device_to_handle(device), 1, &bind_info); if (result != VK_SUCCESS) @@ -366,9 +360,8 @@ radv_device_finish_vrs_image(struct radv_device *device) radv_FreeMemory(radv_device_to_handle(device), radv_device_memory_to_handle(device->vrs.mem), &device->meta_state.alloc); radv_DestroyBuffer(radv_device_to_handle(device), radv_buffer_to_handle(device->vrs.buffer), - &device->meta_state.alloc); - radv_DestroyImage(radv_device_to_handle(device), radv_image_to_handle(device->vrs.image), - &device->meta_state.alloc); + &device->meta_state.alloc); + radv_DestroyImage(radv_device_to_handle(device), radv_image_to_handle(device->vrs.image), &device->meta_state.alloc); } static enum radv_force_vrs @@ -429,7 +422,7 @@ radv_notifier_thread_run(void *data) while (!notifier->quit) { const char *file = radv_get_force_vrs_config_file(); - struct timespec tm = { .tv_nsec = 100000000 }; /* 1OOms */ + struct timespec tm = {.tv_nsec = 100000000}; /* 1OOms */ int length, i = 0; length = read(notifier->fd, buf, BUF_LEN); @@ -529,8 +522,7 @@ struct dispatch_table_builder { }; static void -add_entrypoints(struct dispatch_table_builder *b, - const struct vk_device_entrypoint_table *entrypoints, +add_entrypoints(struct dispatch_table_builder *b, const struct vk_device_entrypoint_table *entrypoints, enum radv_dispatch_table table) { for (int32_t i = table - 1; i >= RADV_DEVICE_DISPATCH_TABLE; i--) { @@ -632,8 +624,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr robust_buffer_access = true; } - vk_foreach_struct_const(ext, pCreateInfo->pNext) - { + vk_foreach_struct_const (ext, pCreateInfo->pNext) { switch (ext->sType) { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: { const VkPhysicalDeviceFeatures2 *features = (const void *)ext; @@ -643,14 +634,12 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr } case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: { const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext; - if (overallocation->overallocationBehavior == - VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD) + if (overallocation->overallocationBehavior == VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD) overallocation_disallowed = true; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: { - const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = - (const void *)ext; + const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = (const void *)ext; custom_border_colors = border_color_features->customBorderColors; break; } @@ -667,15 +656,13 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: { const VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features = (const void *)ext; - if (features->shaderImageFloat32Atomics || - features->sparseImageFloat32Atomics) + if (features->shaderImageFloat32Atomics || features->sparseImageFloat32Atomics) image_float32_atomics = true; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: { const VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features = (const void *)ext; - if (features->shaderImageFloat32AtomicMinMax || - features->sparseImageFloat32AtomicMinMax) + if (features->shaderImageFloat32AtomicMinMax || features->sparseImageFloat32AtomicMinMax) image_float32_atomics = true; break; } @@ -699,8 +686,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: { const VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features = (const void *)ext; - if (features->primitivesGeneratedQuery || - features->primitivesGeneratedQueryWithRasterizerDiscard || + if (features->primitivesGeneratedQuery || features->primitivesGeneratedQueryWithRasterizerDiscard || features->primitivesGeneratedQueryWithNonZeroStreams) primitives_generated_query = true; break; @@ -725,10 +711,8 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT: { const VkPhysicalDeviceExtendedDynamicState3FeaturesEXT *features = (const void *)ext; - if (features->extendedDynamicState3ColorBlendEnable || - features->extendedDynamicState3ColorWriteMask || - features->extendedDynamicState3AlphaToCoverageEnable || - features->extendedDynamicState3ColorBlendEquation) + if (features->extendedDynamicState3ColorBlendEnable || features->extendedDynamicState3ColorWriteMask || + features->extendedDynamicState3AlphaToCoverageEnable || features->extendedDynamicState3ColorBlendEquation) ps_epilogs = true; break; } @@ -773,8 +757,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr /* With update after bind we can't attach bo's to the command buffer * from the descriptor set anymore, so we have to use a global BO list. */ - device->use_global_bo_list = global_bo_list || - (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) || + device->use_global_bo_list = global_bo_list || (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) || device->vk.enabled_extensions.EXT_descriptor_indexing || device->vk.enabled_extensions.EXT_buffer_device_address || device->vk.enabled_extensions.KHR_buffer_device_address || @@ -825,9 +808,8 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority = vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR); - device->queues[qfi] = - vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(struct radv_queue), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + device->queues[qfi] = vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(struct radv_queue), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!device->queues[qfi]) { result = VK_ERROR_OUT_OF_HOST_MEMORY; goto fail_queue; @@ -845,16 +827,15 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr } device->private_sdma_queue = VK_NULL_HANDLE; - device->shader_use_invisible_vram = - (device->instance->perftest_flags & RADV_PERFTEST_DMA_SHADERS) && - /* SDMA buffer copy is only implemented for GFX7+. */ - device->physical_device->rad_info.gfx_level >= GFX7; + device->shader_use_invisible_vram = (device->instance->perftest_flags & RADV_PERFTEST_DMA_SHADERS) && + /* SDMA buffer copy is only implemented for GFX7+. */ + device->physical_device->rad_info.gfx_level >= GFX7; result = radv_init_shader_upload_queue(device); if (result != VK_SUCCESS) goto fail; - device->pbb_allowed = device->physical_device->rad_info.gfx_level >= GFX9 && - !(device->instance->debug_flags & RADV_DEBUG_NOBINNING); + device->pbb_allowed = + device->physical_device->rad_info.gfx_level >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING); /* The maximum number of scratch waves. Scratch space isn't divided * evenly between CUs. The number is only a function of the number of CUs. @@ -869,8 +850,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr * async compute). I've seen ~2% performance difference between 4 and 32. */ uint32_t max_threads_per_block = 2048; - device->scratch_waves = - MAX2(32 * physical_device->rad_info.num_cu, max_threads_per_block / 64); + device->scratch_waves = MAX2(32 * physical_device->rad_info.num_cu, max_threads_per_block / 64); device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1); @@ -885,8 +865,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr * The kernel may not support preemption, but PAL always sets this bit, * so let's also set it here for consistency. */ - device->dispatch_initiator_task = - device->dispatch_initiator | S_00B800_DISABLE_DISP_PREMPT_EN(1); + device->dispatch_initiator_task = device->dispatch_initiator | S_00B800_DISABLE_DISP_PREMPT_EN(1); if (device->instance->debug_flags & RADV_DEBUG_HANG) { /* Enable GPU hangs detection and dump logs if a GPU hang is @@ -899,12 +878,9 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr goto fail; } - fprintf(stderr, - "*****************************************************************************\n"); - fprintf(stderr, - "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n"); - fprintf(stderr, - "*****************************************************************************\n"); + fprintf(stderr, "*****************************************************************************\n"); + fprintf(stderr, "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n"); + fprintf(stderr, "*****************************************************************************\n"); /* Wait for idle after every draw/dispatch to identify the * first bad call. @@ -915,8 +891,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr } if (radv_sqtt_enabled()) { - if (device->physical_device->rad_info.gfx_level < GFX8 || - device->physical_device->rad_info.gfx_level > GFX11) { + if (device->physical_device->rad_info.gfx_level < GFX8 || device->physical_device->rad_info.gfx_level > GFX11) { fprintf(stderr, "GPU hardware not supported: refer to " "the RGP documentation for the list of " "supported GPUs!\n"); @@ -931,8 +906,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr fprintf(stderr, "radv: Thread trace support is enabled (initial buffer size: %u MiB, " "instruction timing: %s, cache counters: %s).\n", - device->sqtt.buffer_size / (1024 * 1024), - radv_is_instruction_timing_enabled() ? "enabled" : "disabled", + device->sqtt.buffer_size / (1024 * 1024), radv_is_instruction_timing_enabled() ? "enabled" : "disabled", radv_spm_trace_enabled() ? "enabled" : "disabled"); if (radv_spm_trace_enabled()) { @@ -944,8 +918,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr goto fail; } } else { - fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n", - device->physical_device->name); + fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n", device->physical_device->name); } } } @@ -1037,21 +1010,18 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr device->force_aniso = MIN2(16, (int)debug_get_num_option("RADV_TEX_ANISO", -1)); if (device->force_aniso >= 0) { - fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n", - 1 << util_logbase2(device->force_aniso)); + fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n", 1 << util_logbase2(device->force_aniso)); } if (use_perf_counters) { size_t bo_size = PERF_CTR_BO_PASS_OFFSET + sizeof(uint64_t) * PERF_CTR_MAX_PASSES; - result = - device->ws->buffer_create(device->ws, bo_size, 4096, RADEON_DOMAIN_GTT, - RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING, - RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->perf_counter_bo); + result = device->ws->buffer_create(device->ws, bo_size, 4096, RADEON_DOMAIN_GTT, + RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING, + RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->perf_counter_bo); if (result != VK_SUCCESS) goto fail_cache; - device->perf_counter_lock_cs = - calloc(sizeof(struct radeon_winsys_cs *), 2 * PERF_CTR_MAX_PASSES); + device->perf_counter_lock_cs = calloc(sizeof(struct radeon_winsys_cs *), 2 * PERF_CTR_MAX_PASSES); if (!device->perf_counter_lock_cs) { result = VK_ERROR_OUT_OF_HOST_MEMORY; goto fail_cache; @@ -1216,13 +1186,11 @@ radv_GetImageMemoryRequirements2(VkDevice _device, const VkImageMemoryRequiremen pMemoryRequirements->memoryRequirements.size = image->size; pMemoryRequirements->memoryRequirements.alignment = image->alignment; - vk_foreach_struct(ext, pMemoryRequirements->pNext) - { + vk_foreach_struct (ext, pMemoryRequirements->pNext) { switch (ext->sType) { case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext; - req->requiresDedicatedAllocation = - image->shareable && image->vk.tiling != VK_IMAGE_TILING_LINEAR; + req->requiresDedicatedAllocation = image->shareable && image->vk.tiling != VK_IMAGE_TILING_LINEAR; req->prefersDedicatedAllocation = req->requiresDedicatedAllocation; break; } @@ -1233,8 +1201,7 @@ radv_GetImageMemoryRequirements2(VkDevice _device, const VkImageMemoryRequiremen } VKAPI_ATTR void VKAPI_CALL -radv_GetDeviceImageMemoryRequirements(VkDevice device, - const VkDeviceImageMemoryRequirements *pInfo, +radv_GetDeviceImageMemoryRequirements(VkDevice device, const VkDeviceImageMemoryRequirements *pInfo, VkMemoryRequirements2 *pMemoryRequirements) { UNUSED VkResult result; @@ -1244,8 +1211,8 @@ radv_GetDeviceImageMemoryRequirements(VkDevice device, * creating an image. * TODO: Avoid creating an image. */ - result = radv_image_create( - device, &(struct radv_image_create_info){.vk_info = pInfo->pCreateInfo}, NULL, &image, true); + result = + radv_image_create(device, &(struct radv_image_create_info){.vk_info = pInfo->pCreateInfo}, NULL, &image, true); assert(result == VK_SUCCESS); VkImageMemoryRequirementsInfo2 info2 = { @@ -1259,8 +1226,7 @@ radv_GetDeviceImageMemoryRequirements(VkDevice device, } VKAPI_ATTR VkResult VKAPI_CALL -radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount, - const VkBindImageMemoryInfo *pBindInfos) +radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount, const VkBindImageMemoryInfo *pBindInfos) { RADV_FROM_HANDLE(radv_device, device, _device); @@ -1275,8 +1241,7 @@ radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount, if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) { struct radv_image *swapchain_img = - radv_image_from_handle(wsi_common_get_image( - swapchain_info->swapchain, swapchain_info->imageIndex)); + radv_image_from_handle(wsi_common_get_image(swapchain_info->swapchain, swapchain_info->imageIndex)); image->bindings[0].bo = swapchain_img->bindings[0].bo; image->bindings[0].offset = swapchain_img->bindings[0].offset; @@ -1296,8 +1261,7 @@ radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount, radv_GetImageMemoryRequirements2(_device, &info, &reqs); if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) { - return vk_errorf(device, VK_ERROR_UNKNOWN, - "Device memory object too small for the image.\n"); + return vk_errorf(device, VK_ERROR_UNKNOWN, "Device memory object too small for the image.\n"); } } @@ -1306,20 +1270,20 @@ radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount, vk_find_struct_const(pBindInfos[i].pNext, BIND_IMAGE_PLANE_MEMORY_INFO); switch (plane_info->planeAspect) { - case VK_IMAGE_ASPECT_PLANE_0_BIT: - image->bindings[0].bo = mem->bo; - image->bindings[0].offset = pBindInfos[i].memoryOffset; - break; - case VK_IMAGE_ASPECT_PLANE_1_BIT: - image->bindings[1].bo = mem->bo; - image->bindings[1].offset = pBindInfos[i].memoryOffset; - break; - case VK_IMAGE_ASPECT_PLANE_2_BIT: - image->bindings[2].bo = mem->bo; - image->bindings[2].offset = pBindInfos[i].memoryOffset; - break; - default: - break; + case VK_IMAGE_ASPECT_PLANE_0_BIT: + image->bindings[0].bo = mem->bo; + image->bindings[0].offset = pBindInfos[i].memoryOffset; + break; + case VK_IMAGE_ASPECT_PLANE_1_BIT: + image->bindings[1].bo = mem->bo; + image->bindings[1].offset = pBindInfos[i].memoryOffset; + break; + case VK_IMAGE_ASPECT_PLANE_2_BIT: + image->bindings[2].bo = mem->bo; + image->bindings[2].offset = pBindInfos[i].memoryOffset; + break; + default: + break; } } else { image->bindings[0].bo = mem->bo; @@ -1347,8 +1311,7 @@ radv_surface_max_layer_count(struct radv_image_view *iview) } static unsigned -get_dcc_max_uncompressed_block_size(const struct radv_device *device, - const struct radv_image_view *iview) +get_dcc_max_uncompressed_block_size(const struct radv_device *device, const struct radv_image_view *iview) { if (device->physical_device->rad_info.gfx_level < GFX10 && iview->image->vk.samples > 1) { if (iview->image->planes[0].surface.bpe == 1) @@ -1390,15 +1353,14 @@ radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iv /* For GFX9+ ac_surface computes values for us (except min_compressed * and max_uncompressed) */ if (device->physical_device->rad_info.gfx_level >= GFX9) { - max_compressed_block_size = - iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size; + max_compressed_block_size = iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size; independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks; independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks; } else { independent_128b_blocks = 0; - if (iview->image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) { + if (iview->image->vk.usage & + (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) { /* If this DCC image is potentially going to be used in texture * fetches, we need some special settings. */ @@ -1453,8 +1415,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX6(desc->swizzle[3] == PIPE_SWIZZLE_1); uint32_t plane_id = iview->image->disjoint ? iview->plane_id : 0; - va = radv_buffer_get_va(iview->image->bindings[plane_id].bo) + - iview->image->bindings[plane_id].offset; + va = radv_buffer_get_va(iview->image->bindings[plane_id].bo) + iview->image->bindings[plane_id].offset; if (iview->nbc_view.valid) { va += iview->nbc_view.base_address_offset; @@ -1483,8 +1444,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) | S_028C74_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) | - S_028C74_RB_ALIGNED(meta.rb_aligned) | - S_028C74_PIPE_ALIGNED(meta.pipe_aligned); + S_028C74_RB_ALIGNED(meta.rb_aligned) | S_028C74_PIPE_ALIGNED(meta.pipe_aligned); cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.epitch); } @@ -1510,8 +1470,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff if (radv_image_has_fmask(iview->image)) { if (device->physical_device->rad_info.gfx_level >= GFX7) - cb->cb_color_pitch |= - S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1); + cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1); cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index); cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max); } else { @@ -1531,8 +1490,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset; va += surf->meta_offset; - if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && - device->physical_device->rad_info.gfx_level <= GFX8) + if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && device->physical_device->rad_info.gfx_level <= GFX8) va += plane->surface.u.legacy.color.dcc_level[iview->vk.base_mip_level].dcc_offset; unsigned dcc_tile_swizzle = tile_swizzle; @@ -1552,13 +1510,11 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff if (device->physical_device->rad_info.gfx_level >= GFX11) cb->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_samples); else - cb->cb_color_attrib |= - S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_samples); + cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_samples); } if (radv_image_has_fmask(iview->image)) { - va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + - surf->fmask_offset; + va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + surf->fmask_offset; cb->cb_color_fmask = va >> 8; cb->cb_color_fmask |= surf->fmask_tile_swizzle; } else { @@ -1573,15 +1529,13 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff endian = radv_colorformat_endian_swap(format); /* blend clamp should be set for all NORM/SRGB types */ - if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM || - ntype == V_028C70_NUMBER_SRGB) + if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM || ntype == V_028C70_NUMBER_SRGB) blend_clamp = 1; /* set blend bypass according to docs if SINT/UINT or 8/24 COLOR variants */ - if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || - format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || - format == V_028C70_COLOR_X24_8_32_FLOAT) { + if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || format == V_028C70_COLOR_8_24 || + format == V_028C70_COLOR_24_8 || format == V_028C70_COLOR_X24_8_32_FLOAT) { blend_clamp = 0; blend_bypass = 1; } @@ -1592,13 +1546,12 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff format == V_028C70_COLOR_8_8_8_8)) ->color_is_int8 = true; #endif - cb->cb_color_info = - S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) | - S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) | - S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM && - ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 && - format != V_028C70_COLOR_24_8) | - S_028C70_NUMBER_TYPE(ntype); + cb->cb_color_info = S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) | + S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) | + S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM && + ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 && + format != V_028C70_COLOR_24_8) | + S_028C70_NUMBER_TYPE(ntype); if (device->physical_device->rad_info.gfx_level >= GFX11) cb->cb_color_info |= S_028C70_FORMAT_GFX11(format); @@ -1629,8 +1582,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff } } - if (radv_image_has_cmask(iview->image) && - !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)) + if (radv_image_has_cmask(iview->image) && !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)) cb->cb_color_info |= S_028C70_FAST_CLEAR(1); if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && !iview->disable_dcc_mrt && @@ -1646,13 +1598,10 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff } if (device->physical_device->rad_info.gfx_level >= GFX9) { - unsigned mip0_depth = iview->image->vk.image_type == VK_IMAGE_TYPE_3D - ? (iview->extent.depth - 1) - : (iview->image->vk.array_layers - 1); - unsigned width = - vk_format_get_plane_width(iview->image->vk.format, iview->plane_id, iview->extent.width); - unsigned height = - vk_format_get_plane_height(iview->image->vk.format, iview->plane_id, iview->extent.height); + unsigned mip0_depth = iview->image->vk.image_type == VK_IMAGE_TYPE_3D ? (iview->extent.depth - 1) + : (iview->image->vk.array_layers - 1); + unsigned width = vk_format_get_plane_width(iview->image->vk.format, iview->plane_id, iview->extent.width); + unsigned height = vk_format_get_plane_height(iview->image->vk.format, iview->plane_id, iview->extent.height); unsigned max_mip = iview->image->vk.mip_levels - 1; if (device->physical_device->rad_info.gfx_level >= GFX10) { @@ -1665,13 +1614,11 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(base_level); - cb->cb_color_attrib3 |= - S_028EE0_MIP0_DEPTH(mip0_depth) | S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) | - S_028EE0_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level >= GFX11 ? 0 : 1); + cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) | S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) | + S_028EE0_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level >= GFX11 ? 0 : 1); } else { cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->vk.base_mip_level); - cb->cb_color_attrib |= - S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type); + cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type); } /* GFX10.3+ can set a custom pitch for 1D and 2D non-array, but it must be a multiple @@ -1679,10 +1626,8 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff * * We set the pitch in MIP0_WIDTH. */ - if (device->physical_device->rad_info.gfx_level && - iview->image->vk.image_type == VK_IMAGE_TYPE_2D && - iview->image->vk.array_layers == 1 && - plane->surface.is_linear) { + if (device->physical_device->rad_info.gfx_level && iview->image->vk.image_type == VK_IMAGE_TYPE_2D && + iview->image->vk.array_layers == 1 && plane->surface.is_linear) { assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0); width = plane->surface.u.gfx9.surf_pitch; @@ -1692,8 +1637,8 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff width *= 2; } - cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) | S_028C68_MIP0_HEIGHT(height - 1) | - S_028C68_MAX_MIP(max_mip); + cb->cb_color_attrib2 = + S_028C68_MIP0_WIDTH(width - 1) | S_028C68_MIP0_HEIGHT(height - 1) | S_028C68_MAX_MIP(max_mip); } } @@ -1713,8 +1658,7 @@ radv_calc_decompress_on_z_planes(const struct radv_device *device, struct radv_i /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */ if (device->physical_device->rad_info.has_two_planes_iterate256_bug && - radv_image_get_iterate256(device, iview->image) && - !radv_image_tile_stencil_disabled(device, iview->image) && + radv_image_get_iterate256(device, iview->image) && !radv_image_tile_stencil_disabled(device, iview->image) && iview->image->vk.samples == 4) { max_zplanes = 1; } @@ -1743,8 +1687,7 @@ radv_calc_decompress_on_z_planes(const struct radv_device *device, struct radv_i } void -radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer, - struct radv_ds_buffer_info *ds) +radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer, struct radv_ds_buffer_info *ds) { const struct radeon_surf *surf = &image->planes[0].surface; @@ -1753,18 +1696,15 @@ radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_ ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); - ds->db_z_info = S_028038_FORMAT(V_028040_Z_16) | - S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) | - S_028038_ZRANGE_PRECISION(1) | - S_028038_TILE_SURFACE_ENABLE(1); + ds->db_z_info = S_028038_FORMAT(V_028040_Z_16) | S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) | + S_028038_ZRANGE_PRECISION(1) | S_028038_TILE_SURFACE_ENABLE(1); ds->db_stencil_info = S_02803C_FORMAT(V_028044_STENCIL_INVALID); - ds->db_depth_size = S_02801C_X_MAX(image->vk.extent.width - 1) | - S_02801C_Y_MAX(image->vk.extent.height - 1); + ds->db_depth_size = S_02801C_X_MAX(image->vk.extent.width - 1) | S_02801C_Y_MAX(image->vk.extent.height - 1); ds->db_htile_data_base = radv_buffer_get_va(htile_buffer->bo) >> 8; - ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1) | - S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING); + ds->db_htile_surface = + S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1) | S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING); } void @@ -1805,11 +1745,10 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID; uint32_t max_slice = radv_surface_max_layer_count(iview) - 1; - ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) | - S_028008_SLICE_MAX(max_slice); + ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) | S_028008_SLICE_MAX(max_slice); if (device->physical_device->rad_info.gfx_level >= GFX10) { - ds->db_depth_view |= S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) | - S_028008_SLICE_MAX_HI(max_slice >> 11); + ds->db_depth_view |= + S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11); } ds->db_htile_data_base = 0; @@ -1826,14 +1765,11 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff assert(surf->u.gfx9.surf_offset == 0); s_offs += surf->u.gfx9.zs.stencil_offset; - ds->db_z_info = S_028038_FORMAT(format) | - S_028038_NUM_SAMPLES(util_logbase2(iview->image->vk.samples)) | - S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) | - S_028038_MAXMIP(iview->image->vk.mip_levels - 1) | + ds->db_z_info = S_028038_FORMAT(format) | S_028038_NUM_SAMPLES(util_logbase2(iview->image->vk.samples)) | + S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) | S_028038_MAXMIP(iview->image->vk.mip_levels - 1) | S_028038_ZRANGE_PRECISION(1) | S_028040_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11); - ds->db_stencil_info = S_02803C_FORMAT(stencil_format) | - S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) | + ds->db_stencil_info = S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) | S_028044_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11); if (device->physical_device->rad_info.gfx_level == GFX9) { @@ -1842,8 +1778,8 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff } ds->db_depth_view |= S_028008_MIPID(level); - ds->db_depth_size = S_02801C_X_MAX(iview->image->vk.extent.width - 1) | - S_02801C_Y_MAX(iview->image->vk.extent.height - 1); + ds->db_depth_size = + S_02801C_X_MAX(iview->image->vk.extent.width - 1) | S_02801C_Y_MAX(iview->image->vk.extent.height - 1); if (radv_htile_enabled(iview->image, level)) { ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1); @@ -1870,8 +1806,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1); } - va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + - surf->meta_offset; + va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + surf->meta_offset; ds->db_htile_data_base = va >> 8; ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1); @@ -1932,10 +1867,9 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); } - ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) | - S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1); - ds->db_depth_slice = - S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1); + ds->db_depth_size = + S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) | S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1); + ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1); if (radv_htile_enabled(iview->image, level)) { ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1); @@ -1944,8 +1878,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); } - va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + - surf->meta_offset; + va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + surf->meta_offset; ds->db_htile_data_base = va >> 8; ds->db_htile_surface = S_028ABC_FULL_CACHE(1); @@ -1963,8 +1896,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff } void -radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples, - unsigned *db_render_control) +radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples, unsigned *db_render_control) { const struct radv_physical_device *pdevice = device->physical_device; unsigned max_allowed_tiles_in_wave = 0; @@ -1987,8 +1919,8 @@ radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_ max_allowed_tiles_in_wave = 15; } - *db_render_control |= S_028000_OREO_MODE(V_028000_OMODE_O_THEN_B) | - S_028000_MAX_ALLOWED_TILES_IN_WAVE(max_allowed_tiles_in_wave); + *db_render_control |= + S_028000_OREO_MODE(V_028000_OMODE_O_THEN_B) | S_028000_MAX_ALLOWED_TILES_IN_WAVE(max_allowed_tiles_in_wave); } VKAPI_ATTR VkResult VKAPI_CALL @@ -2010,17 +1942,15 @@ radv_GetMemoryFdKHR(VkDevice _device, const VkMemoryGetFdInfoKHR *pGetFdInfo, in } static uint32_t -radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev, - enum radeon_bo_domain domains, enum radeon_bo_flag flags, - enum radeon_bo_flag ignore_flags) +radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev, enum radeon_bo_domain domains, + enum radeon_bo_flag flags, enum radeon_bo_flag ignore_flags) { /* Don't count GTT/CPU as relevant: * * - We're not fully consistent between the two. * - Sometimes VRAM gets VRAM|GTT. */ - const enum radeon_bo_domain relevant_domains = - RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA; + const enum radeon_bo_domain relevant_domains = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA; uint32_t bits = 0; for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) { if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains)) @@ -2058,8 +1988,8 @@ radv_compute_valid_memory_types(struct radv_physical_device *dev, enum radeon_bo return bits; } VKAPI_ATTR VkResult VKAPI_CALL -radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, - int fd, VkMemoryFdPropertiesKHR *pMemoryFdProperties) +radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, int fd, + VkMemoryFdPropertiesKHR *pMemoryFdProperties) { RADV_FROM_HANDLE(radv_device, device, _device); @@ -2070,8 +2000,7 @@ radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBi if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags)) return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); - pMemoryFdProperties->memoryTypeBits = - radv_compute_valid_memory_types(device->physical_device, domains, flags); + pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(device->physical_device, domains, flags); return VK_SUCCESS; } default: @@ -2089,8 +2018,8 @@ radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBi #ifndef _WIN32 VKAPI_ATTR VkResult VKAPI_CALL radv_GetCalibratedTimestampsEXT(VkDevice _device, uint32_t timestampCount, - const VkCalibratedTimestampInfoEXT *pTimestampInfos, - uint64_t *pTimestamps, uint64_t *pMaxDeviation) + const VkCalibratedTimestampInfoEXT *pTimestampInfos, uint64_t *pTimestamps, + uint64_t *pMaxDeviation) { RADV_FROM_HANDLE(radv_device, device, _device); uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq; diff --git a/src/amd/vulkan/radv_device_generated_commands.c b/src/amd/vulkan/radv_device_generated_commands.c index bca45f7..038786a 100644 --- a/src/amd/vulkan/radv_device_generated_commands.c +++ b/src/amd/vulkan/radv_device_generated_commands.c @@ -29,9 +29,8 @@ #include "vk_common_entrypoints.h" static void -radv_get_sequence_size(const struct radv_indirect_command_layout *layout, - const struct radv_graphics_pipeline *pipeline, uint32_t *cmd_size, - uint32_t *upload_size) +radv_get_sequence_size(const struct radv_indirect_command_layout *layout, const struct radv_graphics_pipeline *pipeline, + uint32_t *cmd_size, uint32_t *upload_size) { const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk); const struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX); @@ -41,7 +40,7 @@ radv_get_sequence_size(const struct radv_indirect_command_layout *layout, if (layout->bind_vbo_mask) { *upload_size += 16 * util_bitcount(vs->info.vs.vb_desc_usage_mask); - /* One PKT3_SET_SH_REG for emitting VBO pointer (32-bit) */ + /* One PKT3_SET_SH_REG for emitting VBO pointer (32-bit) */ *cmd_size += 3 * 4; } @@ -63,8 +62,7 @@ radv_get_sequence_size(const struct radv_indirect_command_layout *layout, *cmd_size += (2 + locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].num_sgprs) * 4; } if (need_copy) - *upload_size += - align(pipeline->base.push_constant_size + 16 * pipeline->base.dynamic_offset_count, 16); + *upload_size += align(pipeline->base.push_constant_size + 16 * pipeline->base.dynamic_offset_count, 16); } if (layout->binds_index_buffer) { @@ -181,32 +179,28 @@ dgc_emit(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *value) { assert(value->bit_size >= 32); nir_ssa_def *offset = nir_load_var(b, cs->offset); - nir_store_ssbo(b, value, cs->descriptor, offset,.access = ACCESS_NON_READABLE); + nir_store_ssbo(b, value, cs->descriptor, offset, .access = ACCESS_NON_READABLE); nir_store_var(b, cs->offset, nir_iadd_imm(b, offset, value->num_components * value->bit_size / 8), 0x1); } +#define load_param32(b, field) \ + nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), .base = offsetof(struct radv_dgc_params, field), .range = 4) -#define load_param32(b, field) \ - nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \ - .base = offsetof(struct radv_dgc_params, field), .range = 4) +#define load_param16(b, field) \ + nir_ubfe_imm((b), \ + nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \ + .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4), \ + (offsetof(struct radv_dgc_params, field) & 2) * 8, 16) -#define load_param16(b, field) \ - nir_ubfe_imm( \ - (b), \ - nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \ - .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4), \ - (offsetof(struct radv_dgc_params, field) & 2) * 8, 16) +#define load_param8(b, field) \ + nir_ubfe_imm((b), \ + nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \ + .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4), \ + (offsetof(struct radv_dgc_params, field) & 3) * 8, 8) -#define load_param8(b, field) \ - nir_ubfe_imm( \ - (b), \ - nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \ - .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4), \ - (offsetof(struct radv_dgc_params, field) & 3) * 8, 8) - -#define load_param64(b, field) \ - nir_pack_64_2x32((b), nir_load_push_constant((b), 2, 32, nir_imm_int((b), 0), \ - .base = offsetof(struct radv_dgc_params, field), .range = 8)) +#define load_param64(b, field) \ + nir_pack_64_2x32((b), nir_load_push_constant((b), 2, 32, nir_imm_int((b), 0), \ + .base = offsetof(struct radv_dgc_params, field), .range = 8)) static nir_ssa_def * nir_pkt3(nir_builder *b, unsigned op, nir_ssa_def *len) @@ -216,14 +210,12 @@ nir_pkt3(nir_builder *b, unsigned op, nir_ssa_def *len) } static void -dgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vtx_base_sgpr, - nir_ssa_def *first_vertex, nir_ssa_def *first_instance, nir_ssa_def *drawid) +dgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vtx_base_sgpr, nir_ssa_def *first_vertex, + nir_ssa_def *first_instance, nir_ssa_def *drawid) { vtx_base_sgpr = nir_u2u32(b, vtx_base_sgpr); - nir_ssa_def *has_drawid = - nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID); - nir_ssa_def *has_baseinstance = - nir_test_mask(b, vtx_base_sgpr, DGC_USES_BASEINSTANCE); + nir_ssa_def *has_drawid = nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID); + nir_ssa_def *has_baseinstance = nir_test_mask(b, vtx_base_sgpr, DGC_USES_BASEINSTANCE); nir_ssa_def *pkt_cnt = nir_imm_int(b, 1); pkt_cnt = nir_bcsel(b, has_drawid, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt); @@ -234,8 +226,8 @@ dgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vtx nir_imm_int(b, PKT3_NOP_PAD), nir_imm_int(b, PKT3_NOP_PAD), }; - values[3] = nir_bcsel(b, nir_ior(b, has_drawid, has_baseinstance), - nir_bcsel(b, has_drawid, drawid, first_instance), values[4]); + values[3] = nir_bcsel(b, nir_ior(b, has_drawid, has_baseinstance), nir_bcsel(b, has_drawid, drawid, first_instance), + values[4]); values[4] = nir_bcsel(b, nir_iand(b, has_drawid, has_baseinstance), first_instance, values[4]); dgc_emit(b, cs, nir_vec(b, values, 5)); @@ -250,12 +242,11 @@ dgc_emit_instance_count(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *inst } static void -dgc_emit_draw_index_offset_2(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *index_offset, - nir_ssa_def *index_count, nir_ssa_def *max_index_count) +dgc_emit_draw_index_offset_2(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *index_offset, nir_ssa_def *index_count, + nir_ssa_def *max_index_count) { - nir_ssa_def *values[5] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_OFFSET_2, 3, false)), - max_index_count, index_offset, index_count, - nir_imm_int(b, V_0287F0_DI_SRC_SEL_DMA)}; + nir_ssa_def *values[5] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_OFFSET_2, 3, false)), max_index_count, index_offset, + index_count, nir_imm_int(b, V_0287F0_DI_SRC_SEL_DMA)}; dgc_emit(b, cs, nir_vec(b, values, 5)); } @@ -281,8 +272,7 @@ build_dgc_buffer_tail(nir_builder *b, nir_ssa_def *sequence_count) { nir_ssa_def *cmd_buf_tail_start = nir_imul(b, cmd_buf_stride, sequence_count); - nir_variable *offset = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "offset"); + nir_variable *offset = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "offset"); nir_store_var(b, offset, cmd_buf_tail_start, 0x1); nir_ssa_def *dst_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PREPARE); @@ -316,8 +306,8 @@ build_dgc_buffer_tail(nir_builder *b, nir_ssa_def *sequence_count) * Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV. */ static void -dgc_emit_draw(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, - nir_ssa_def *stream_base, nir_ssa_def *draw_params_offset, nir_ssa_def *sequence_id) +dgc_emit_draw(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base, + nir_ssa_def *draw_params_offset, nir_ssa_def *sequence_id) { nir_ssa_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr); nir_ssa_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base); @@ -341,16 +331,14 @@ dgc_emit_draw(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, * Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV. */ static void -dgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, - nir_ssa_def *stream_base, nir_ssa_def *draw_params_offset, - nir_ssa_def *sequence_id, nir_ssa_def *max_index_count) +dgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base, + nir_ssa_def *draw_params_offset, nir_ssa_def *sequence_id, nir_ssa_def *max_index_count) { nir_ssa_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr); nir_ssa_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base); nir_ssa_def *draw_data0 = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset); - nir_ssa_def *draw_data1 = - nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd_imm(b, stream_offset, 16)); + nir_ssa_def *draw_data1 = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd_imm(b, stream_offset, 16)); nir_ssa_def *index_count = nir_channel(b, draw_data0, 0); nir_ssa_def *instance_count = nir_channel(b, draw_data0, 1); nir_ssa_def *first_index = nir_channel(b, draw_data0, 2); @@ -366,31 +354,23 @@ dgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream nir_pop_if(b, 0); } - /** * Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV. */ static void -dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, - nir_ssa_def *stream_base, nir_ssa_def *index_buffer_offset, - nir_ssa_def *ibo_type_32, nir_ssa_def *ibo_type_8, - nir_variable *index_size_var, nir_variable *max_index_count_var, - const struct radv_device *device) +dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base, + nir_ssa_def *index_buffer_offset, nir_ssa_def *ibo_type_32, nir_ssa_def *ibo_type_8, + nir_variable *index_size_var, nir_variable *max_index_count_var, const struct radv_device *device) { - nir_ssa_def *index_stream_offset = - nir_iadd(b, index_buffer_offset, stream_base); - nir_ssa_def *data = - nir_load_ssbo(b, 4, 32, stream_buf, index_stream_offset); + nir_ssa_def *index_stream_offset = nir_iadd(b, index_buffer_offset, stream_base); + nir_ssa_def *data = nir_load_ssbo(b, 4, 32, stream_buf, index_stream_offset); nir_ssa_def *vk_index_type = nir_channel(b, data, 3); - nir_ssa_def *index_type = nir_bcsel( - b, nir_ieq(b, vk_index_type, ibo_type_32), - nir_imm_int(b, V_028A7C_VGT_INDEX_32), nir_imm_int(b, V_028A7C_VGT_INDEX_16)); - index_type = nir_bcsel(b, nir_ieq(b, vk_index_type, ibo_type_8), - nir_imm_int(b, V_028A7C_VGT_INDEX_8), index_type); - - nir_ssa_def *index_size = nir_iand_imm( - b, nir_ushr(b, nir_imm_int(b, 0x142), nir_imul_imm(b, index_type, 4)), 0xf); + nir_ssa_def *index_type = nir_bcsel(b, nir_ieq(b, vk_index_type, ibo_type_32), nir_imm_int(b, V_028A7C_VGT_INDEX_32), + nir_imm_int(b, V_028A7C_VGT_INDEX_16)); + index_type = nir_bcsel(b, nir_ieq(b, vk_index_type, ibo_type_8), nir_imm_int(b, V_028A7C_VGT_INDEX_8), index_type); + + nir_ssa_def *index_size = nir_iand_imm(b, nir_ushr(b, nir_imm_int(b, 0x142), nir_imul_imm(b, index_type, 4)), 0xf); nir_store_var(b, index_size_var, index_size, 0x1); nir_ssa_def *max_index_count = nir_udiv(b, nir_channel(b, data, 2), index_size); @@ -401,12 +381,10 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream if (device->physical_device->rad_info.gfx_level >= GFX9) { unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX; if (device->physical_device->rad_info.gfx_level < GFX9 || - (device->physical_device->rad_info.gfx_level == GFX9 && - device->physical_device->rad_info.me_fw_version < 26)) + (device->physical_device->rad_info.gfx_level == GFX9 && device->physical_device->rad_info.me_fw_version < 26)) opcode = PKT3_SET_UCONFIG_REG; cmd_values[0] = nir_imm_int(b, PKT3(opcode, 1, 0)); - cmd_values[1] = nir_imm_int( - b, (R_03090C_VGT_INDEX_TYPE - CIK_UCONFIG_REG_OFFSET) >> 2 | (2u << 28)); + cmd_values[1] = nir_imm_int(b, (R_03090C_VGT_INDEX_TYPE - CIK_UCONFIG_REG_OFFSET) >> 2 | (2u << 28)); cmd_values[2] = index_type; } else { cmd_values[0] = nir_imm_int(b, PKT3(PKT3_INDEX_TYPE, 0, 0)); @@ -430,19 +408,17 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream * Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_STATE_FLAGS_NV. */ static void -dgc_emit_state(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, - nir_ssa_def *stream_base, nir_ssa_def *state_offset) +dgc_emit_state(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base, + nir_ssa_def *state_offset) { nir_ssa_def *stream_offset = nir_iadd(b, state_offset, stream_base); nir_ssa_def *state = nir_load_ssbo(b, 1, 32, stream_buf, stream_offset); state = nir_iand_imm(b, state, 1); - nir_ssa_def *reg = - nir_ior(b, load_param32(b, pa_su_sc_mode_cntl_base), nir_ishl_imm(b, state, 2)); + nir_ssa_def *reg = nir_ior(b, load_param32(b, pa_su_sc_mode_cntl_base), nir_ishl_imm(b, state, 2)); - nir_ssa_def *cmd_values[3] = { - nir_imm_int(b, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)), - nir_imm_int(b, (R_028814_PA_SU_SC_MODE_CNTL - SI_CONTEXT_REG_OFFSET) >> 2), reg}; + nir_ssa_def *cmd_values[3] = {nir_imm_int(b, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)), + nir_imm_int(b, (R_028814_PA_SU_SC_MODE_CNTL - SI_CONTEXT_REG_OFFSET) >> 2), reg}; dgc_emit(b, cs, nir_vec(b, cmd_values, 3)); @@ -450,8 +426,7 @@ dgc_emit_state(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_push_if(b, nir_ine_imm(b, scissor_count, 0)); { nir_ssa_def *scissor_offset = load_param16(b, scissor_offset); - nir_variable *idx = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "scissor_copy_idx"); + nir_variable *idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "scissor_copy_idx"); nir_store_var(b, idx, nir_imm_int(b, 0), 1); nir_push_loop(b); @@ -480,9 +455,8 @@ dgc_emit_state(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, * Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV. */ static void -dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, - nir_ssa_def *stream_base, nir_ssa_def *push_const_mask, - nir_variable *upload_offset) +dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base, + nir_ssa_def *push_const_mask, nir_variable *upload_offset) { nir_ssa_def *vbo_cnt = load_param8(b, vbo_cnt); nir_ssa_def *const_copy = nir_ine_imm(b, load_param8(b, const_copy), 0); @@ -490,8 +464,7 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea nir_ssa_def *const_copy_words = nir_ushr_imm(b, const_copy_size, 2); const_copy_words = nir_bcsel(b, const_copy, const_copy_words, nir_imm_int(b, 0)); - nir_variable *idx = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "const_copy_idx"); + nir_variable *idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "const_copy_idx"); nir_store_var(b, idx, nir_imm_int(b, 0), 0x1); nir_ssa_def *param_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PARAMS); @@ -508,26 +481,22 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea } nir_pop_if(b, NULL); - nir_variable *data = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "copy_data"); + nir_variable *data = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "copy_data"); nir_ssa_def *update = nir_iand(b, push_const_mask, nir_ishl(b, nir_imm_int64(b, 1), cur_idx)); - update = nir_bcsel(b, nir_ult_imm(b, cur_idx, 64 /* bits in push_const_mask */), update, - nir_imm_int64(b, 0)); + update = nir_bcsel(b, nir_ult_imm(b, cur_idx, 64 /* bits in push_const_mask */), update, nir_imm_int64(b, 0)); nir_push_if(b, nir_ine_imm(b, update, 0)); { - nir_ssa_def *stream_offset = nir_load_ssbo( - b, 1, 32, param_buf, nir_iadd(b, param_offset_offset, nir_ishl_imm(b, cur_idx, 2))); - nir_ssa_def *new_data = - nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset)); + nir_ssa_def *stream_offset = + nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_offset_offset, nir_ishl_imm(b, cur_idx, 2))); + nir_ssa_def *new_data = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset)); nir_store_var(b, data, new_data, 0x1); } nir_push_else(b, NULL); { nir_store_var(b, data, - nir_load_ssbo(b, 1, 32, param_buf, - nir_iadd(b, param_const_offset, nir_ishl_imm(b, cur_idx, 2))), + nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_const_offset, nir_ishl_imm(b, cur_idx, 2))), 0x1); } nir_pop_if(b, NULL); @@ -540,8 +509,7 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea } nir_pop_loop(b, NULL); - nir_variable *shader_idx = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "shader_idx"); + nir_variable *shader_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "shader_idx"); nir_store_var(b, shader_idx, nir_imm_int(b, 0), 0x1); nir_ssa_def *shader_cnt = load_param16(b, push_constant_shader_cnt); @@ -554,17 +522,16 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea } nir_pop_if(b, NULL); - nir_ssa_def *reg_info = nir_load_ssbo( - b, 3, 32, param_buf, nir_iadd(b, param_offset, nir_imul_imm(b, cur_shader_idx, 12))); + nir_ssa_def *reg_info = + nir_load_ssbo(b, 3, 32, param_buf, nir_iadd(b, param_offset, nir_imul_imm(b, cur_shader_idx, 12))); nir_ssa_def *upload_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 0, 16); nir_ssa_def *inline_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 16, 16); nir_ssa_def *inline_mask = nir_pack_64_2x32(b, nir_channels(b, reg_info, 0x6)); nir_push_if(b, nir_ine_imm(b, upload_sgpr, 0)); { - nir_ssa_def *pkt[3] = { - nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), upload_sgpr, - nir_iadd(b, load_param32(b, upload_addr), nir_load_var(b, upload_offset))}; + nir_ssa_def *pkt[3] = {nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), upload_sgpr, + nir_iadd(b, load_param32(b, upload_addr), nir_load_var(b, upload_offset))}; dgc_emit(b, cs, nir_vec(b, pkt, 3)); } @@ -596,29 +563,24 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea } nir_pop_if(b, NULL); - nir_variable *data = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "copy_data"); + nir_variable *data = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "copy_data"); - nir_ssa_def *update = - nir_iand(b, push_const_mask, nir_ishl(b, nir_imm_int64(b, 1), cur_idx)); - update = nir_bcsel(b, nir_ult_imm(b, cur_idx, 64 /* bits in push_const_mask */), update, - nir_imm_int64(b, 0)); + nir_ssa_def *update = nir_iand(b, push_const_mask, nir_ishl(b, nir_imm_int64(b, 1), cur_idx)); + update = + nir_bcsel(b, nir_ult_imm(b, cur_idx, 64 /* bits in push_const_mask */), update, nir_imm_int64(b, 0)); nir_push_if(b, nir_ine_imm(b, update, 0)); { nir_ssa_def *stream_offset = - nir_load_ssbo(b, 1, 32, param_buf, - nir_iadd(b, param_offset_offset, nir_ishl_imm(b, cur_idx, 2))); - nir_ssa_def *new_data = - nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset)); + nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_offset_offset, nir_ishl_imm(b, cur_idx, 2))); + nir_ssa_def *new_data = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset)); nir_store_var(b, data, new_data, 0x1); } nir_push_else(b, NULL); { nir_store_var( b, data, - nir_load_ssbo(b, 1, 32, param_buf, - nir_iadd(b, param_const_offset, nir_ishl_imm(b, cur_idx, 2))), + nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_const_offset, nir_ishl_imm(b, cur_idx, 2))), 0x1); } nir_pop_if(b, NULL); @@ -639,13 +601,11 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea * For emitting VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV. */ static void -dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, - nir_ssa_def *stream_base, nir_ssa_def *vbo_bind_mask, - nir_variable *upload_offset, const struct radv_device *device) +dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base, + nir_ssa_def *vbo_bind_mask, nir_variable *upload_offset, const struct radv_device *device) { nir_ssa_def *vbo_cnt = load_param8(b, vbo_cnt); - nir_variable *vbo_idx = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx"); + nir_variable *vbo_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx"); nir_store_var(b, vbo_idx, nir_imm_int(b, 0), 0x1); nir_push_loop(b); @@ -657,15 +617,13 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea nir_pop_if(b, NULL); nir_ssa_def *vbo_offset = nir_imul_imm(b, nir_load_var(b, vbo_idx), 16); - nir_variable *vbo_data = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uvec4_type(), "vbo_data"); + nir_variable *vbo_data = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uvec4_type(), "vbo_data"); nir_ssa_def *param_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PARAMS); nir_store_var(b, vbo_data, nir_load_ssbo(b, 4, 32, param_buf, vbo_offset), 0xf); - nir_ssa_def *vbo_override = nir_ine_imm( - b, nir_iand(b, vbo_bind_mask, nir_ishl(b, nir_imm_int(b, 1), nir_load_var(b, vbo_idx))), - 0); + nir_ssa_def *vbo_override = + nir_ine_imm(b, nir_iand(b, vbo_bind_mask, nir_ishl(b, nir_imm_int(b, 1), nir_load_var(b, vbo_idx))), 0); nir_push_if(b, vbo_override); { nir_ssa_def *vbo_offset_offset = @@ -679,14 +637,11 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea nir_ssa_def *size = nir_channel(b, stream_data, 2); nir_ssa_def *stride = nir_channel(b, stream_data, 3); - nir_ssa_def *dyn_stride = - nir_test_mask(b, nir_channel(b, vbo_over_data, 0), DGC_DYNAMIC_STRIDE); - nir_ssa_def *old_stride = - nir_ubfe_imm(b, nir_channel(b, nir_load_var(b, vbo_data), 1), 16, 14); + nir_ssa_def *dyn_stride = nir_test_mask(b, nir_channel(b, vbo_over_data, 0), DGC_DYNAMIC_STRIDE); + nir_ssa_def *old_stride = nir_ubfe_imm(b, nir_channel(b, nir_load_var(b, vbo_data), 1), 16, 14); stride = nir_bcsel(b, dyn_stride, stride, old_stride); - nir_ssa_def *use_per_attribute_vb_descs = - nir_test_mask(b, nir_channel(b, vbo_over_data, 0), 1u << 31); + nir_ssa_def *use_per_attribute_vb_descs = nir_test_mask(b, nir_channel(b, vbo_over_data, 0), 1u << 31); nir_variable *num_records = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "num_records"); nir_store_var(b, num_records, size, 0x1); @@ -694,8 +649,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea nir_push_if(b, use_per_attribute_vb_descs); { nir_ssa_def *attrib_end = nir_ubfe_imm(b, nir_channel(b, vbo_over_data, 1), 16, 16); - nir_ssa_def *attrib_index_offset = - nir_ubfe_imm(b, nir_channel(b, vbo_over_data, 1), 0, 16); + nir_ssa_def *attrib_index_offset = nir_ubfe_imm(b, nir_channel(b, vbo_over_data, 1), 0, 16); nir_push_if(b, nir_ult(b, nir_load_var(b, num_records), attrib_end)); { @@ -709,10 +663,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea nir_push_else(b, NULL); { nir_ssa_def *r = nir_iadd( - b, - nir_iadd_imm( - b, nir_udiv(b, nir_isub(b, nir_load_var(b, num_records), attrib_end), stride), - 1), + b, nir_iadd_imm(b, nir_udiv(b, nir_isub(b, nir_load_var(b, num_records), attrib_end), stride), 1), attrib_index_offset); nir_store_var(b, num_records, r, 0x1); } @@ -726,8 +677,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea convert_cond = nir_iand(b, convert_cond, nir_ieq_imm(b, stride, 0)); nir_ssa_def *new_records = - nir_iadd(b, nir_imul(b, nir_iadd_imm(b, nir_load_var(b, num_records), -1), stride), - attrib_end); + nir_iadd(b, nir_imul(b, nir_iadd_imm(b, nir_load_var(b, num_records), -1), stride), attrib_end); new_records = nir_bcsel(b, convert_cond, new_records, nir_load_var(b, num_records)); nir_store_var(b, num_records, new_records, 0x1); } @@ -736,8 +686,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea if (device->physical_device->rad_info.gfx_level != GFX8) { nir_push_if(b, nir_ine_imm(b, stride, 0)); { - nir_ssa_def *r = - nir_iadd(b, nir_load_var(b, num_records), nir_iadd_imm(b, stride, -1)); + nir_ssa_def *r = nir_iadd(b, nir_load_var(b, num_records), nir_iadd_imm(b, stride, -1)); nir_store_var(b, num_records, nir_udiv(b, r, stride), 0x1); } nir_pop_if(b, NULL); @@ -747,9 +696,8 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea nir_ssa_def *rsrc_word3 = nir_channel(b, nir_load_var(b, vbo_data), 3); if (device->physical_device->rad_info.gfx_level >= GFX10) { - nir_ssa_def *oob_select = - nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW), - nir_imm_int(b, V_008F0C_OOB_SELECT_STRUCTURED)); + nir_ssa_def *oob_select = nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW), + nir_imm_int(b, V_008F0C_OOB_SELECT_STRUCTURED)); rsrc_word3 = nir_iand_imm(b, rsrc_word3, C_008F0C_OOB_SELECT); rsrc_word3 = nir_ior(b, rsrc_word3, nir_ishl_imm(b, oob_select, 28)); } @@ -757,8 +705,8 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea nir_ssa_def *va_hi = nir_iand_imm(b, nir_unpack_64_2x32_split_y(b, va), 0xFFFF); stride = nir_iand_imm(b, stride, 0x3FFF); nir_ssa_def *new_vbo_data[4] = {nir_unpack_64_2x32_split_x(b, va), - nir_ior(b, nir_ishl_imm(b, stride, 16), va_hi), - nir_load_var(b, num_records), rsrc_word3}; + nir_ior(b, nir_ishl_imm(b, stride, 16), va_hi), nir_load_var(b, num_records), + rsrc_word3}; nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf); } nir_pop_if(b, NULL); @@ -769,30 +717,25 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea */ nir_ssa_def *num_records = nir_channel(b, nir_load_var(b, vbo_data), 2); nir_ssa_def *buf_va = - nir_iand_imm(b, nir_pack_64_2x32(b, nir_trim_vector(b, nir_load_var(b, vbo_data), 2)), - (1ull << 48) - 1ull); + nir_iand_imm(b, nir_pack_64_2x32(b, nir_trim_vector(b, nir_load_var(b, vbo_data), 2)), (1ull << 48) - 1ull); nir_push_if(b, nir_ior(b, nir_ieq_imm(b, num_records, 0), nir_ieq_imm(b, buf_va, 0))); { - nir_ssa_def *new_vbo_data[4] = {nir_imm_int(b, 0), nir_imm_int(b, 0), nir_imm_int(b, 0), - nir_imm_int(b, 0)}; + nir_ssa_def *new_vbo_data[4] = {nir_imm_int(b, 0), nir_imm_int(b, 0), nir_imm_int(b, 0), nir_imm_int(b, 0)}; nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf); } nir_pop_if(b, NULL); nir_ssa_def *upload_off = nir_iadd(b, nir_load_var(b, upload_offset), vbo_offset); - nir_store_ssbo(b, nir_load_var(b, vbo_data), cs->descriptor, upload_off, - .access = ACCESS_NON_READABLE); + nir_store_ssbo(b, nir_load_var(b, vbo_data), cs->descriptor, upload_off, .access = ACCESS_NON_READABLE); nir_store_var(b, vbo_idx, nir_iadd_imm(b, nir_load_var(b, vbo_idx), 1), 0x1); } nir_pop_loop(b, NULL); - nir_ssa_def *packet[3] = { - nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), load_param16(b, vbo_reg), - nir_iadd(b, load_param32(b, upload_addr), nir_load_var(b, upload_offset))}; + nir_ssa_def *packet[3] = {nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), load_param16(b, vbo_reg), + nir_iadd(b, load_param32(b, upload_addr), nir_load_var(b, upload_offset))}; dgc_emit(b, cs, nir_vec(b, packet, 3)); - nir_store_var(b, upload_offset, - nir_iadd(b, nir_load_var(b, upload_offset), nir_imul_imm(b, vbo_cnt, 16)), 0x1); + nir_store_var(b, upload_offset, nir_iadd(b, nir_load_var(b, upload_offset), nir_imul_imm(b, vbo_cnt, 16)), 0x1); } static nir_shader * @@ -846,25 +789,21 @@ build_dgc_prepare_shader(struct radv_device *dev) nir_variable *upload_offset = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "upload_offset"); - nir_store_var(&b, upload_offset, - nir_iadd(&b, load_param32(&b, cmd_buf_size), - nir_imul(&b, load_param32(&b, upload_stride), sequence_id)), - 0x1); + nir_store_var( + &b, upload_offset, + nir_iadd(&b, load_param32(&b, cmd_buf_size), nir_imul(&b, load_param32(&b, upload_stride), sequence_id)), 0x1); nir_ssa_def *vbo_bind_mask = load_param32(&b, vbo_bind_mask); nir_push_if(&b, nir_ine_imm(&b, vbo_bind_mask, 0)); { - dgc_emit_vertex_buffer(&b, &cmd_buf, stream_buf, stream_base, vbo_bind_mask, upload_offset, - dev); + dgc_emit_vertex_buffer(&b, &cmd_buf, stream_buf, stream_base, vbo_bind_mask, upload_offset, dev); } nir_pop_if(&b, NULL); - nir_ssa_def *push_const_mask = load_param64(&b, push_constant_mask); nir_push_if(&b, nir_ine_imm(&b, push_const_mask, 0)); { - dgc_emit_push_constant(&b, &cmd_buf, stream_buf, stream_base, push_const_mask, - upload_offset); + dgc_emit_push_constant(&b, &cmd_buf, stream_buf, stream_base, push_const_mask, upload_offset); } nir_pop_if(&b, 0); @@ -876,8 +815,7 @@ build_dgc_prepare_shader(struct radv_device *dev) nir_push_if(&b, nir_ieq_imm(&b, load_param16(&b, draw_indexed), 0)); { - dgc_emit_draw(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, draw_params_offset), - sequence_id); + dgc_emit_draw(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, draw_params_offset), sequence_id); } nir_push_else(&b, NULL); { @@ -891,23 +829,20 @@ build_dgc_prepare_shader(struct radv_device *dev) nir_ssa_def *bind_index_buffer = nir_ieq_imm(&b, nir_load_var(&b, index_size_var), 0); nir_push_if(&b, bind_index_buffer); { - dgc_emit_index_buffer(&b, &cmd_buf, stream_buf, stream_base, - load_param16(&b, index_buffer_offset), - load_param32(&b, ibo_type_32), load_param32(&b, ibo_type_8), - index_size_var, max_index_count_var, dev); + dgc_emit_index_buffer(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, index_buffer_offset), + load_param32(&b, ibo_type_32), load_param32(&b, ibo_type_8), index_size_var, + max_index_count_var, dev); } nir_pop_if(&b, NULL); nir_ssa_def *index_size = nir_load_var(&b, index_size_var); nir_ssa_def *max_index_count = nir_load_var(&b, max_index_count_var); - index_size = - nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, index_size_var), index_size); - max_index_count = nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, max_index_count_var), - max_index_count); + index_size = nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, index_size_var), index_size); + max_index_count = nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, max_index_count_var), max_index_count); - dgc_emit_draw_indexed(&b, &cmd_buf, stream_buf, stream_base, - load_param16(&b, draw_params_offset), sequence_id, max_index_count); + dgc_emit_draw_indexed(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, draw_params_offset), sequence_id, + max_index_count); } nir_pop_if(&b, NULL); @@ -934,11 +869,10 @@ radv_device_finish_dgc_prepare_state(struct radv_device *device) { radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.dgc_prepare.pipeline, &device->meta_state.alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), - device->meta_state.dgc_prepare.p_layout, &device->meta_state.alloc); - device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), - device->meta_state.dgc_prepare.ds_layout, - &device->meta_state.alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), device->meta_state.dgc_prepare.p_layout, + &device->meta_state.alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout( + radv_device_to_handle(device), device->meta_state.dgc_prepare.ds_layout, &device->meta_state.alloc); } VkResult @@ -947,35 +881,33 @@ radv_device_init_dgc_prepare_state(struct radv_device *device) VkResult result; nir_shader *cs = build_dgc_prepare_shader(device); - VkDescriptorSetLayoutCreateInfo ds_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = DGC_NUM_DESCS, - .pBindings = (VkDescriptorSetLayoutBinding[]){ - {.binding = DGC_DESC_STREAM, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - {.binding = DGC_DESC_PREPARE, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - {.binding = DGC_DESC_PARAMS, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - {.binding = DGC_DESC_COUNT, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = NULL}, - }}; - - result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, - &device->meta_state.alloc, + VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = DGC_NUM_DESCS, + .pBindings = (VkDescriptorSetLayoutBinding[]){ + {.binding = DGC_DESC_STREAM, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + {.binding = DGC_DESC_PREPARE, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + {.binding = DGC_DESC_PARAMS, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + {.binding = DGC_DESC_COUNT, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL}, + }}; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, &device->meta_state.dgc_prepare.ds_layout); if (result != VK_SUCCESS) goto cleanup; @@ -985,12 +917,10 @@ radv_device_init_dgc_prepare_state(struct radv_device *device) .setLayoutCount = 1, .pSetLayouts = &device->meta_state.dgc_prepare.ds_layout, .pushConstantRangeCount = 1, - .pPushConstantRanges = - &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct radv_dgc_params)}, + .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct radv_dgc_params)}, }; - result = radv_CreatePipelineLayout(radv_device_to_handle(device), &leaf_pl_create_info, - &device->meta_state.alloc, + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &leaf_pl_create_info, &device->meta_state.alloc, &device->meta_state.dgc_prepare.p_layout); if (result != VK_SUCCESS) goto cleanup; @@ -1010,9 +940,8 @@ radv_device_init_dgc_prepare_state(struct radv_device *device) .layout = device->meta_state.dgc_prepare.p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &pipeline_info, &device->meta_state.alloc, - &device->meta_state.dgc_prepare.pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &pipeline_info, + &device->meta_state.alloc, &device->meta_state.dgc_prepare.pipeline); if (result != VK_SUCCESS) goto cleanup; @@ -1022,20 +951,17 @@ cleanup: } VKAPI_ATTR VkResult VKAPI_CALL -radv_CreateIndirectCommandsLayoutNV(VkDevice _device, - const VkIndirectCommandsLayoutCreateInfoNV *pCreateInfo, +radv_CreateIndirectCommandsLayoutNV(VkDevice _device, const VkIndirectCommandsLayoutCreateInfoNV *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkIndirectCommandsLayoutNV *pIndirectCommandsLayout) { RADV_FROM_HANDLE(radv_device, device, _device); struct radv_indirect_command_layout *layout; - size_t size = - sizeof(*layout) + pCreateInfo->tokenCount * sizeof(VkIndirectCommandsLayoutTokenNV); + size_t size = sizeof(*layout) + pCreateInfo->tokenCount * sizeof(VkIndirectCommandsLayoutTokenNV); - layout = - vk_zalloc2(&device->vk.alloc, pAllocator, size, alignof(struct radv_indirect_command_layout), - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + layout = vk_zalloc2(&device->vk.alloc, pAllocator, size, alignof(struct radv_indirect_command_layout), + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!layout) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1070,8 +996,7 @@ radv_CreateIndirectCommandsLayoutNV(VkDevice _device, break; case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV: layout->bind_vbo_mask |= 1u << pCreateInfo->pTokens[i].vertexBindingUnit; - layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] = - pCreateInfo->pTokens[i].offset; + layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] = pCreateInfo->pTokens[i].offset; if (pCreateInfo->pTokens[i].vertexDynamicStride) layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] |= DGC_DYNAMIC_STRIDE; break; @@ -1098,8 +1023,7 @@ radv_CreateIndirectCommandsLayoutNV(VkDevice _device, } VKAPI_ATTR void VKAPI_CALL -radv_DestroyIndirectCommandsLayoutNV(VkDevice _device, - VkIndirectCommandsLayoutNV indirectCommandsLayout, +radv_DestroyIndirectCommandsLayoutNV(VkDevice _device, VkIndirectCommandsLayoutNV indirectCommandsLayout, const VkAllocationCallbacks *pAllocator) { RADV_FROM_HANDLE(radv_device, device, _device); @@ -1113,9 +1037,9 @@ radv_DestroyIndirectCommandsLayoutNV(VkDevice _device, } VKAPI_ATTR void VKAPI_CALL -radv_GetGeneratedCommandsMemoryRequirementsNV( - VkDevice _device, const VkGeneratedCommandsMemoryRequirementsInfoNV *pInfo, - VkMemoryRequirements2 *pMemoryRequirements) +radv_GetGeneratedCommandsMemoryRequirementsNV(VkDevice _device, + const VkGeneratedCommandsMemoryRequirementsInfoNV *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) { RADV_FROM_HANDLE(radv_device, device, _device); VK_FROM_HANDLE(radv_indirect_command_layout, layout, pInfo->indirectCommandsLayout); @@ -1128,8 +1052,7 @@ radv_GetGeneratedCommandsMemoryRequirementsNV( VkDeviceSize cmd_buf_size = radv_align_cmdbuf_size(cmd_stride * pInfo->maxSequencesCount); VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount; - pMemoryRequirements->memoryRequirements.memoryTypeBits = - device->physical_device->memory_types_32bit; + pMemoryRequirements->memoryRequirements.memoryTypeBits = device->physical_device->memory_types_32bit; pMemoryRequirements->memoryRequirements.alignment = 256; pMemoryRequirements->memoryRequirements.size = align(cmd_buf_size + upload_buf_size, pMemoryRequirements->memoryRequirements.alignment); @@ -1145,11 +1068,9 @@ radv_CmdPreprocessGeneratedCommandsNV(VkCommandBuffer commandBuffer, /* Always need to call this directly before draw due to dependence on bound state. */ void -radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, - const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo) +radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo) { - VK_FROM_HANDLE(radv_indirect_command_layout, layout, - pGeneratedCommandsInfo->indirectCommandsLayout); + VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout); VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline); VK_FROM_HANDLE(radv_buffer, prep_buffer, pGeneratedCommandsInfo->preprocessBuffer); struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); @@ -1160,13 +1081,12 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, uint32_t cmd_stride, upload_stride; radv_get_sequence_size(layout, graphics_pipeline, &cmd_stride, &upload_stride); - unsigned cmd_buf_size = - radv_align_cmdbuf_size(cmd_stride * pGeneratedCommandsInfo->sequencesCount); + unsigned cmd_buf_size = radv_align_cmdbuf_size(cmd_stride * pGeneratedCommandsInfo->sequencesCount); unsigned vb_size = layout->bind_vbo_mask ? util_bitcount(vs->info.vs.vb_desc_usage_mask) * 24 : 0; unsigned const_size = graphics_pipeline->base.push_constant_size + - 16 * graphics_pipeline->base.dynamic_offset_count + - sizeof(layout->push_constant_offsets) + ARRAY_SIZE(graphics_pipeline->base.shaders) * 12; + 16 * graphics_pipeline->base.dynamic_offset_count + sizeof(layout->push_constant_offsets) + + ARRAY_SIZE(graphics_pipeline->base.shaders) * 12; if (!layout->push_constant_mask) const_size = 0; @@ -1186,25 +1106,22 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, void *upload_data_base = upload_data; - radv_buffer_init(&token_buffer, cmd_buffer->device, cmd_buffer->upload.upload_bo, upload_size, - upload_offset); + radv_buffer_init(&token_buffer, cmd_buffer->device, cmd_buffer->upload.upload_bo, upload_size, upload_offset); - uint64_t upload_addr = radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset + - pGeneratedCommandsInfo->preprocessOffset; + uint64_t upload_addr = + radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset + pGeneratedCommandsInfo->preprocessOffset; - uint16_t vtx_base_sgpr = - (cmd_buffer->state.graphics_pipeline->vtx_base_sgpr - SI_SH_REG_OFFSET) >> 2; + uint16_t vtx_base_sgpr = (cmd_buffer->state.graphics_pipeline->vtx_base_sgpr - SI_SH_REG_OFFSET) >> 2; if (cmd_buffer->state.graphics_pipeline->uses_drawid) vtx_base_sgpr |= DGC_USES_DRAWID; if (cmd_buffer->state.graphics_pipeline->uses_baseinstance) vtx_base_sgpr |= DGC_USES_BASEINSTANCE; - const struct radv_shader *vertex_shader = - radv_get_shader(graphics_pipeline->base.shaders, MESA_SHADER_VERTEX); - uint16_t vbo_sgpr = ((radv_get_user_sgpr(vertex_shader, AC_UD_VS_VERTEX_BUFFERS)->sgpr_idx * 4 + - vertex_shader->info.user_data_0) - - SI_SH_REG_OFFSET) >> - 2; + const struct radv_shader *vertex_shader = radv_get_shader(graphics_pipeline->base.shaders, MESA_SHADER_VERTEX); + uint16_t vbo_sgpr = + ((radv_get_user_sgpr(vertex_shader, AC_UD_VS_VERTEX_BUFFERS)->sgpr_idx * 4 + vertex_shader->info.user_data_0) - + SI_SH_REG_OFFSET) >> + 2; struct radv_dgc_params params = { .cmd_buf_stride = cmd_stride, .cmd_buf_size = cmd_buf_size, @@ -1214,8 +1131,7 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, .stream_stride = layout->input_stride, .draw_indexed = layout->indexed, .draw_params_offset = layout->draw_params_offset, - .base_index_size = - layout->binds_index_buffer ? 0 : radv_get_vgt_index_size(cmd_buffer->state.index_type), + .base_index_size = layout->binds_index_buffer ? 0 : radv_get_vgt_index_size(cmd_buffer->state.index_type), .vtx_base_sgpr = vtx_base_sgpr, .max_index_count = cmd_buffer->state.max_index_count, .index_buffer_offset = layout->index_buffer_offset, @@ -1243,8 +1159,8 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, uint32_t attrib_end = graphics_pipeline->attrib_ends[i]; params.vbo_bind_mask |= ((layout->bind_vbo_mask >> binding) & 1u) << idx; - vbo_info[2 * idx] = ((vertex_shader->info.vs.use_per_attribute_vb_descs ? 1u : 0u) << 31) | - layout->vbo_offsets[binding]; + vbo_info[2 * idx] = + ((vertex_shader->info.vs.use_per_attribute_vb_descs ? 1u : 0u) << 31) | layout->vbo_offsets[binding]; vbo_info[2 * idx + 1] = graphics_pipeline->attrib_index_offset[i] | (attrib_end << 16); ++idx; } @@ -1272,15 +1188,13 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, unsigned inline_sgpr = 0; if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) { - upload_sgpr = - (shader->info.user_data_0 + 4 * locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx - - SI_SH_REG_OFFSET) >> - 2; + upload_sgpr = (shader->info.user_data_0 + 4 * locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx - + SI_SH_REG_OFFSET) >> + 2; } if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) { - inline_sgpr = (shader->info.user_data_0 + - 4 * locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx - + inline_sgpr = (shader->info.user_data_0 + 4 * locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx - SI_SH_REG_OFFSET) >> 2; desc[idx * 3 + 1] = graphics_pipeline->base.shaders[i]->info.inline_push_constant_mask; @@ -1293,8 +1207,8 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, params.push_constant_shader_cnt = idx; - params.const_copy_size = graphics_pipeline->base.push_constant_size + - 16 * graphics_pipeline->base.dynamic_offset_count; + params.const_copy_size = + graphics_pipeline->base.push_constant_size + 16 * graphics_pipeline->base.dynamic_offset_count; params.push_constant_mask = layout->push_constant_mask; memcpy(upload_data, layout->push_constant_offsets, sizeof(layout->push_constant_offsets)); @@ -1310,14 +1224,10 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, } if (scissor_size) { - params.scissor_offset = (char*)upload_data - (char*)upload_data_base; + params.scissor_offset = (char *)upload_data - (char *)upload_data_base; params.scissor_count = scissor_size / 4; - struct radeon_cmdbuf scissor_cs = { - .buf = upload_data, - .cdw = 0, - .max_dw = scissor_size / 4 - }; + struct radeon_cmdbuf scissor_cs = {.buf = upload_data, .cdw = 0, .max_dw = scissor_size / 4}; radv_write_scissors(cmd_buffer, &scissor_cs); assert(scissor_cs.cdw * 4 == scissor_size); @@ -1327,9 +1237,8 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, VkWriteDescriptorSet ds_writes[5]; VkDescriptorBufferInfo buf_info[ARRAY_SIZE(ds_writes)]; int ds_cnt = 0; - buf_info[ds_cnt] = (VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&token_buffer), - .offset = 0, - .range = upload_size}; + buf_info[ds_cnt] = + (VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&token_buffer), .offset = 0, .range = upload_size}; ds_writes[ds_cnt] = (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstBinding = DGC_DESC_PARAMS, .dstArrayElement = 0, @@ -1350,50 +1259,43 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, ++ds_cnt; if (pGeneratedCommandsInfo->streamCount > 0) { - buf_info[ds_cnt] = - (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->pStreams[0].buffer, - .offset = pGeneratedCommandsInfo->pStreams[0].offset, - .range = VK_WHOLE_SIZE}; - ds_writes[ds_cnt] = - (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = DGC_DESC_STREAM, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .pBufferInfo = &buf_info[ds_cnt]}; + buf_info[ds_cnt] = (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->pStreams[0].buffer, + .offset = pGeneratedCommandsInfo->pStreams[0].offset, + .range = VK_WHOLE_SIZE}; + ds_writes[ds_cnt] = (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = DGC_DESC_STREAM, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = &buf_info[ds_cnt]}; ++ds_cnt; } if (pGeneratedCommandsInfo->sequencesCountBuffer != VK_NULL_HANDLE) { - buf_info[ds_cnt] = - (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->sequencesCountBuffer, - .offset = pGeneratedCommandsInfo->sequencesCountOffset, - .range = VK_WHOLE_SIZE}; - ds_writes[ds_cnt] = - (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = DGC_DESC_COUNT, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .pBufferInfo = &buf_info[ds_cnt]}; + buf_info[ds_cnt] = (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->sequencesCountBuffer, + .offset = pGeneratedCommandsInfo->sequencesCountOffset, + .range = VK_WHOLE_SIZE}; + ds_writes[ds_cnt] = (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = DGC_DESC_COUNT, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = &buf_info[ds_cnt]}; ++ds_cnt; params.sequence_count |= 1u << 31; } - radv_meta_save( - &saved_state, cmd_buffer, - RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS); radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, cmd_buffer->device->meta_state.dgc_prepare.pipeline); - radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), - cmd_buffer->device->meta_state.dgc_prepare.p_layout, + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), cmd_buffer->device->meta_state.dgc_prepare.p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(params), ¶ms); radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - cmd_buffer->device->meta_state.dgc_prepare.p_layout, 0, ds_cnt, - ds_writes); + cmd_buffer->device->meta_state.dgc_prepare.p_layout, 0, ds_cnt, ds_writes); unsigned block_count = MAX2(1, round_up_u32(pGeneratedCommandsInfo->sequencesCount, 64)); vk_common_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1); @@ -1401,6 +1303,5 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, radv_buffer_finish(&token_buffer); radv_meta_restore(&saved_state, cmd_buffer); - cmd_buffer->state.flush_bits |= - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2; + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2; } diff --git a/src/amd/vulkan/radv_device_memory.c b/src/amd/vulkan/radv_device_memory.c index 9aad1ed..c6f5bf0 100644 --- a/src/amd/vulkan/radv_device_memory.c +++ b/src/amd/vulkan/radv_device_memory.c @@ -28,8 +28,7 @@ #include "radv_private.h" void -radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device, - struct radeon_winsys_bo *bo) +radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device, struct radeon_winsys_bo *bo) { memset(mem, 0, sizeof(*mem)); vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY); @@ -44,8 +43,7 @@ radv_device_memory_finish(struct radv_device_memory *mem) } void -radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator, - struct radv_device_memory *mem) +radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator, struct radv_device_memory *mem) { if (mem == NULL) return; @@ -86,8 +84,7 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); - const VkImportMemoryFdInfoKHR *import_info = - vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); + const VkImportMemoryFdInfoKHR *import_info = vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); const VkMemoryDedicatedAllocateInfo *dedicate_info = vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO); const VkExportMemoryAllocateInfo *export_info = @@ -103,15 +100,14 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA); if (pAllocateInfo->allocationSize == 0 && !ahb_import_info && - !(export_info && (export_info->handleTypes & - VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) { + !(export_info && + (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) { /* Apparently, this is allowed */ *pMem = VK_NULL_HANDLE; return VK_SUCCESS; } - mem = - vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + mem = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (mem == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -147,8 +143,8 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc if (replay_info && replay_info->opaqueCaptureAddress) replay_address = replay_info->opaqueCaptureAddress; - unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1, - (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX)); + unsigned priority = + MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1, (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX)); mem->user_ptr = NULL; @@ -160,8 +156,8 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc result = radv_import_ahb_memory(device, mem, priority, ahb_import_info); if (result != VK_SUCCESS) goto fail; - } else if (export_info && (export_info->handleTypes & - VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) { + } else if (export_info && + (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) { result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo); if (result != VK_SUCCESS) goto fail; @@ -175,14 +171,12 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc close(import_info->fd); } - if (mem->image && mem->image->plane_count == 1 && - !vk_format_is_depth_or_stencil(mem->image->vk.format) && mem->image->vk.samples == 1 && - mem->image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { + if (mem->image && mem->image->plane_count == 1 && !vk_format_is_depth_or_stencil(mem->image->vk.format) && + mem->image->vk.samples == 1 && mem->image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { struct radeon_bo_metadata metadata; device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata); - struct radv_image_create_info create_info = {.no_metadata_planes = true, - .bo_metadata = &metadata}; + struct radv_image_create_info create_info = {.no_metadata_planes = true, .bo_metadata = &metadata}; /* This gives a basic ability to import radeonsi images * that don't have DCC. This is not guaranteed by any @@ -195,8 +189,8 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc } } else if (host_ptr_info) { assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT); - result = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer, - pAllocateInfo->allocationSize, priority, &mem->bo); + result = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer, pAllocateInfo->allocationSize, + priority, &mem->bo); if (result != VK_SUCCESS) { goto fail; } else { @@ -206,9 +200,7 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); uint32_t heap_index; - heap_index = - device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex] - .heapIndex; + heap_index = device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex; domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex]; flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex]; @@ -236,8 +228,7 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc flags |= RADEON_FLAG_ZERO_VRAM; if (device->overallocation_disallowed) { - uint64_t total_size = - device->physical_device->memory_properties.memoryHeaps[heap_index].size; + uint64_t total_size = device->physical_device->memory_properties.memoryHeaps[heap_index].size; mtx_lock(&device->overallocation_mutex); if (device->allocated_memory_size[heap_index] + alloc_size > total_size) { @@ -249,9 +240,8 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc mtx_unlock(&device->overallocation_mutex); } - result = device->ws->buffer_create(device->ws, alloc_size, - device->physical_device->rad_info.max_alignment, domain, - flags, priority, replay_address, &mem->bo); + result = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment, + domain, flags, priority, replay_address, &mem->bo); if (result != VK_SUCCESS) { if (device->overallocation_disallowed) { @@ -335,30 +325,26 @@ radv_UnmapMemory2KHR(VkDevice _device, const VkMemoryUnmapInfoKHR *pMemoryUnmapI } VKAPI_ATTR VkResult VKAPI_CALL -radv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, - const VkMappedMemoryRange *pMemoryRanges) +radv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges) { return VK_SUCCESS; } VKAPI_ATTR VkResult VKAPI_CALL -radv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, - const VkMappedMemoryRange *pMemoryRanges) +radv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges) { return VK_SUCCESS; } VKAPI_ATTR uint64_t VKAPI_CALL -radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device, - const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo) +radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo) { RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory); return radv_buffer_get_va(mem->bo); } VKAPI_ATTR void VKAPI_CALL -radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory, - VkDeviceSize *pCommittedMemoryInBytes) +radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory, VkDeviceSize *pCommittedMemoryInBytes) { *pCommittedMemoryInBytes = 0; } diff --git a/src/amd/vulkan/radv_event.c b/src/amd/vulkan/radv_event.c index 476d22c..56414dc 100644 --- a/src/amd/vulkan/radv_event.c +++ b/src/amd/vulkan/radv_event.c @@ -28,8 +28,7 @@ #include "radv_private.h" static void -radv_destroy_event(struct radv_device *device, const VkAllocationCallbacks *pAllocator, - struct radv_event *event) +radv_destroy_event(struct radv_device *device, const VkAllocationCallbacks *pAllocator, struct radv_event *event) { if (event->bo) device->ws->buffer_destroy(device->ws, event->bo); @@ -47,8 +46,7 @@ radv_create_event(struct radv_device *device, const VkEventCreateInfo *pCreateIn struct radv_event *event; VkResult result; - event = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + event = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!event) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -62,10 +60,9 @@ radv_create_event(struct radv_device *device, const VkEventCreateInfo *pCreateIn bo_flags = RADEON_FLAG_CPU_ACCESS; } - result = device->ws->buffer_create( - device->ws, 8, 8, bo_domain, - RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_NO_INTERPROCESS_SHARING | bo_flags, - RADV_BO_PRIORITY_FENCE, 0, &event->bo); + result = device->ws->buffer_create(device->ws, 8, 8, bo_domain, + RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_NO_INTERPROCESS_SHARING | bo_flags, + RADV_BO_PRIORITY_FENCE, 0, &event->bo); if (result != VK_SUCCESS) { radv_destroy_event(device, pAllocator, event); return vk_error(device, result); @@ -85,8 +82,8 @@ radv_create_event(struct radv_device *device, const VkEventCreateInfo *pCreateIn } VKAPI_ATTR VkResult VKAPI_CALL -radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, VkEvent *pEvent) +radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, + VkEvent *pEvent) { RADV_FROM_HANDLE(radv_device, device, _device); VkResult result = radv_create_event(device, pCreateInfo, pAllocator, pEvent, false); diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c index d652210..1133cd3 100644 --- a/src/amd/vulkan/radv_formats.c +++ b/src/amd/vulkan/radv_formats.c @@ -31,13 +31,13 @@ #include "vk_android.h" #include "vk_util.h" -#include "ac_drm_fourcc.h" #include "util/format_r11g11b10f.h" #include "util/format_rgb9e5.h" #include "util/format_srgb.h" #include "util/half_float.h" -#include "vulkan/util/vk_format.h" #include "vulkan/util/vk_enum_defines.h" +#include "vulkan/util/vk_format.h" +#include "ac_drm_fourcc.h" uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc, int first_non_void) @@ -181,8 +181,7 @@ radv_is_vertex_buffer_format_supported(VkFormat format) } uint32_t -radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc, - int first_non_void) +radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc, int first_non_void) { bool uniform = true; int i; @@ -316,22 +315,21 @@ radv_translate_tex_dataformat(VkFormat format, const struct util_format_descript if (!uniform) { switch (desc->nr_channels) { case 3: - if (desc->channel[0].size == 5 && desc->channel[1].size == 6 && - desc->channel[2].size == 5) { + if (desc->channel[0].size == 5 && desc->channel[1].size == 6 && desc->channel[2].size == 5) { return V_008F14_IMG_DATA_FORMAT_5_6_5; } goto out_unknown; case 4: - if (desc->channel[0].size == 5 && desc->channel[1].size == 5 && - desc->channel[2].size == 5 && desc->channel[3].size == 1) { + if (desc->channel[0].size == 5 && desc->channel[1].size == 5 && desc->channel[2].size == 5 && + desc->channel[3].size == 1) { return V_008F14_IMG_DATA_FORMAT_1_5_5_5; } - if (desc->channel[0].size == 1 && desc->channel[1].size == 5 && - desc->channel[2].size == 5 && desc->channel[3].size == 5) { + if (desc->channel[0].size == 1 && desc->channel[1].size == 5 && desc->channel[2].size == 5 && + desc->channel[3].size == 5) { return V_008F14_IMG_DATA_FORMAT_5_5_5_1; } - if (desc->channel[0].size == 10 && desc->channel[1].size == 10 && - desc->channel[2].size == 10 && desc->channel[3].size == 2) { + if (desc->channel[0].size == 10 && desc->channel[1].size == 10 && desc->channel[2].size == 10 && + desc->channel[3].size == 2) { /* Closed VK driver does this also no 2/10/10/10 snorm */ if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED && desc->channel[0].normalized) goto out_unknown; @@ -401,8 +399,7 @@ out_unknown: } uint32_t -radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc, - int first_non_void) +radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc, int first_non_void) { assert(vk_format_get_plane_count(format) == 1); @@ -468,14 +465,11 @@ radv_is_sampler_format_supported(VkFormat format, bool *linear_sampling) { const struct util_format_description *desc = vk_format_description(format); uint32_t num_format; - if (format == VK_FORMAT_UNDEFINED || format == VK_FORMAT_R64_UINT || - format == VK_FORMAT_R64_SINT) + if (format == VK_FORMAT_UNDEFINED || format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT) return false; - num_format = - radv_translate_tex_numformat(format, desc, vk_format_get_first_non_void_channel(format)); + num_format = radv_translate_tex_numformat(format, desc, vk_format_get_first_non_void_channel(format)); - if (num_format == V_008F14_IMG_NUM_FORMAT_USCALED || - num_format == V_008F14_IMG_NUM_FORMAT_SSCALED) + if (num_format == V_008F14_IMG_NUM_FORMAT_USCALED || num_format == V_008F14_IMG_NUM_FORMAT_SSCALED) return false; if (num_format == V_008F14_IMG_NUM_FORMAT_UNORM || num_format == V_008F14_IMG_NUM_FORMAT_SNORM || @@ -490,14 +484,12 @@ radv_is_sampler_format_supported(VkFormat format, bool *linear_sampling) bool radv_is_atomic_format_supported(VkFormat format) { - return format == VK_FORMAT_R32_UINT || format == VK_FORMAT_R32_SINT || - format == VK_FORMAT_R32_SFLOAT || format == VK_FORMAT_R64_UINT || - format == VK_FORMAT_R64_SINT; + return format == VK_FORMAT_R32_UINT || format == VK_FORMAT_R32_SINT || format == VK_FORMAT_R32_SFLOAT || + format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT; } bool -radv_is_storage_image_format_supported(const struct radv_physical_device *physical_device, - VkFormat format) +radv_is_storage_image_format_supported(const struct radv_physical_device *physical_device, VkFormat format) { const struct util_format_description *desc = vk_format_description(format); unsigned data_format, num_format; @@ -507,10 +499,8 @@ radv_is_storage_image_format_supported(const struct radv_physical_device *physic if (vk_format_is_depth_or_stencil(format)) return false; - data_format = - radv_translate_tex_dataformat(format, desc, vk_format_get_first_non_void_channel(format)); - num_format = - radv_translate_tex_numformat(format, desc, vk_format_get_first_non_void_channel(format)); + data_format = radv_translate_tex_dataformat(format, desc, vk_format_get_first_non_void_channel(format)); + num_format = radv_translate_tex_numformat(format, desc, vk_format_get_first_non_void_channel(format)); if (data_format == ~0 || num_format == ~0) return false; @@ -562,19 +552,16 @@ radv_is_buffer_format_supported(VkFormat format, bool *scaled) if (format == VK_FORMAT_UNDEFINED) return false; - data_format = - radv_translate_buffer_dataformat(desc, vk_format_get_first_non_void_channel(format)); + data_format = radv_translate_buffer_dataformat(desc, vk_format_get_first_non_void_channel(format)); num_format = radv_translate_buffer_numformat(desc, vk_format_get_first_non_void_channel(format)); if (scaled) - *scaled = (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) || - (num_format == V_008F0C_BUF_NUM_FORMAT_USCALED); + *scaled = (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) || (num_format == V_008F0C_BUF_NUM_FORMAT_USCALED); return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID && num_format != ~0; } bool -radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice, VkFormat format, - bool *blendable) +radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice, VkFormat format, bool *blendable) { const struct util_format_description *desc = vk_format_description(format); uint32_t color_format = ac_get_cb_format(pdevice->rad_info.gfx_level, desc->format); @@ -633,15 +620,13 @@ radv_is_filter_minmax_format_supported(VkFormat format) bool radv_device_supports_etc(const struct radv_physical_device *physical_device) { - return physical_device->rad_info.family == CHIP_VEGA10 || - physical_device->rad_info.family == CHIP_RAVEN || - physical_device->rad_info.family == CHIP_RAVEN2 || - physical_device->rad_info.family == CHIP_STONEY; + return physical_device->rad_info.family == CHIP_VEGA10 || physical_device->rad_info.family == CHIP_RAVEN || + physical_device->rad_info.family == CHIP_RAVEN2 || physical_device->rad_info.family == CHIP_STONEY; } static void -radv_physical_device_get_format_properties(struct radv_physical_device *physical_device, - VkFormat format, VkFormatProperties3 *out_properties) +radv_physical_device_get_format_properties(struct radv_physical_device *physical_device, VkFormat format, + VkFormatProperties3 *out_properties) { VkFormatFeatureFlags2 linear = 0, tiled = 0, buffer = 0; const struct util_format_description *desc = vk_format_description(format); @@ -665,14 +650,11 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical const bool multiplanar = vk_format_get_plane_count(format) > 1; if (multiplanar || desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { - uint64_t tiling = VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT | - VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT | - VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT | - VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + uint64_t tiling = VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT | + VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT; if (vk_format_get_ycbcr_info(format)) { - tiling |= VK_FORMAT_FEATURE_2_COSITED_CHROMA_SAMPLES_BIT | - VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT; + tiling |= VK_FORMAT_FEATURE_2_COSITED_CHROMA_SAMPLES_BIT | VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT; /* The subsampled formats have no support for linear filters. */ if (desc->layout != UTIL_FORMAT_LAYOUT_SUBSAMPLED) @@ -680,28 +662,25 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical } if (physical_device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE) { - if (format == VK_FORMAT_G8_B8R8_2PLANE_420_UNORM || - format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) - tiling |= VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR | VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR; + if (format == VK_FORMAT_G8_B8R8_2PLANE_420_UNORM || + format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) + tiling |= VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR | VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR; } if (multiplanar) tiling |= VK_FORMAT_FEATURE_2_DISJOINT_BIT; /* Fails for unknown reasons with linear tiling & subsampled formats. */ - out_properties->linearTilingFeatures = - desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED ? 0 : tiling; + out_properties->linearTilingFeatures = desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED ? 0 : tiling; out_properties->optimalTilingFeatures = tiling; out_properties->bufferFeatures = 0; return; } if (radv_is_storage_image_format_supported(physical_device, format)) { - tiled |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT | - VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT | + tiled |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT | VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT; - linear |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT | - VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT | + linear |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT | VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT; } @@ -709,11 +688,9 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical buffer |= VK_FORMAT_FEATURE_2_VERTEX_BUFFER_BIT; if (radv_is_buffer_format_supported(format, &scaled)) { - if (format != VK_FORMAT_R64_UINT && format != VK_FORMAT_R64_SINT && !scaled && - !vk_format_is_srgb(format)) + if (format != VK_FORMAT_R64_UINT && format != VK_FORMAT_R64_SINT && !scaled && !vk_format_is_srgb(format)) buffer |= VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT; - buffer |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT | - VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT | + buffer |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT | VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT; } @@ -793,8 +770,7 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical case VK_FORMAT_A2B10G10R10_SSCALED_PACK32: case VK_FORMAT_A2R10G10B10_SINT_PACK32: case VK_FORMAT_A2B10G10R10_SINT_PACK32: - buffer &= - ~(VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT); + buffer &= ~(VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT); linear = 0; tiled = 0; break; @@ -933,8 +909,7 @@ radv_translate_colorswap(VkFormat format, bool do_endian_swap) if ((HAS_SWIZZLE(0, X) && HAS_SWIZZLE(1, Y)) || (HAS_SWIZZLE(0, X) && HAS_SWIZZLE(1, NONE)) || (HAS_SWIZZLE(0, NONE) && HAS_SWIZZLE(1, Y))) return V_028C70_SWAP_STD; /* XY__ */ - else if ((HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, X)) || - (HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, NONE)) || + else if ((HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, X)) || (HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, NONE)) || (HAS_SWIZZLE(0, NONE) && HAS_SWIZZLE(1, X))) /* YX__ */ return (do_endian_swap ? V_028C70_SWAP_STD : V_028C70_SWAP_STD_REV); @@ -1057,8 +1032,7 @@ radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2], VkClearCol return false; } } else { - fprintf(stderr, "failed to fast clear for unhandled component type in format %d\n", - format); + fprintf(stderr, "failed to fast clear for unhandled component type in format %d\n", format); return false; } clear_val |= (v & ((1ULL << channel->size) - 1)) << channel->shift; @@ -1108,8 +1082,7 @@ radv_get_modifier_flags(struct radv_physical_device *dev, VkFormat format, uint6 static void radv_list_drm_format_modifiers(struct radv_physical_device *dev, VkFormat format, - const VkFormatProperties3 *format_props, - VkDrmFormatModifierPropertiesListEXT *mod_list) + const VkFormatProperties3 *format_props, VkDrmFormatModifierPropertiesListEXT *mod_list) { unsigned mod_count; @@ -1121,12 +1094,11 @@ radv_list_drm_format_modifiers(struct radv_physical_device *dev, VkFormat format return; } - VK_OUTARRAY_MAKE_TYPED(VkDrmFormatModifierPropertiesEXT, out, - mod_list->pDrmFormatModifierProperties, + VK_OUTARRAY_MAKE_TYPED(VkDrmFormatModifierPropertiesEXT, out, mod_list->pDrmFormatModifierProperties, &mod_list->drmFormatModifierCount); - ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options, - vk_format_to_pipe_format(format), &mod_count, NULL); + ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options, vk_format_to_pipe_format(format), &mod_count, + NULL); uint64_t *mods = malloc(mod_count * sizeof(uint64_t)); if (!mods) { @@ -1134,12 +1106,11 @@ radv_list_drm_format_modifiers(struct radv_physical_device *dev, VkFormat format mod_list->drmFormatModifierCount = 0; return; } - ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options, - vk_format_to_pipe_format(format), &mod_count, mods); + ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options, vk_format_to_pipe_format(format), &mod_count, + mods); for (unsigned i = 0; i < mod_count; ++i) { - VkFormatFeatureFlags2 features = - radv_get_modifier_flags(dev, format, mods[i], format_props); + VkFormatFeatureFlags2 features = radv_get_modifier_flags(dev, format, mods[i], format_props); unsigned planes = vk_format_get_plane_count(format); if (planes == 1) { if (ac_modifier_has_dcc_retile(mods[i])) @@ -1151,12 +1122,12 @@ radv_list_drm_format_modifiers(struct radv_physical_device *dev, VkFormat format if (!features) continue; - vk_outarray_append_typed(VkDrmFormatModifierPropertiesEXT, &out, out_props) { - *out_props = (VkDrmFormatModifierPropertiesEXT) { + vk_outarray_append_typed(VkDrmFormatModifierPropertiesEXT, &out, out_props) + { + *out_props = (VkDrmFormatModifierPropertiesEXT){ .drmFormatModifier = mods[i], .drmFormatModifierPlaneCount = planes, - .drmFormatModifierTilingFeatures = - vk_format_features2_to_features(features), + .drmFormatModifierTilingFeatures = vk_format_features2_to_features(features), }; }; } @@ -1179,12 +1150,11 @@ radv_list_drm_format_modifiers_2(struct radv_physical_device *dev, VkFormat form return; } - VK_OUTARRAY_MAKE_TYPED(VkDrmFormatModifierProperties2EXT, out, - mod_list->pDrmFormatModifierProperties, + VK_OUTARRAY_MAKE_TYPED(VkDrmFormatModifierProperties2EXT, out, mod_list->pDrmFormatModifierProperties, &mod_list->drmFormatModifierCount); - ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options, - vk_format_to_pipe_format(format), &mod_count, NULL); + ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options, vk_format_to_pipe_format(format), &mod_count, + NULL); uint64_t *mods = malloc(mod_count * sizeof(uint64_t)); if (!mods) { @@ -1192,12 +1162,11 @@ radv_list_drm_format_modifiers_2(struct radv_physical_device *dev, VkFormat form mod_list->drmFormatModifierCount = 0; return; } - ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options, - vk_format_to_pipe_format(format), &mod_count, mods); + ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options, vk_format_to_pipe_format(format), &mod_count, + mods); for (unsigned i = 0; i < mod_count; ++i) { - VkFormatFeatureFlags2 features = - radv_get_modifier_flags(dev, format, mods[i], format_props); + VkFormatFeatureFlags2 features = radv_get_modifier_flags(dev, format, mods[i], format_props); unsigned planes = vk_format_get_plane_count(format); if (planes == 1) { if (ac_modifier_has_dcc_retile(mods[i])) @@ -1209,8 +1178,9 @@ radv_list_drm_format_modifiers_2(struct radv_physical_device *dev, VkFormat form if (!features) continue; - vk_outarray_append_typed(VkDrmFormatModifierProperties2EXT, &out, out_props) { - *out_props = (VkDrmFormatModifierProperties2EXT) { + vk_outarray_append_typed(VkDrmFormatModifierProperties2EXT, &out, out_props) + { + *out_props = (VkDrmFormatModifierProperties2EXT){ .drmFormatModifier = mods[i], .drmFormatModifierPlaneCount = planes, .drmFormatModifierTilingFeatures = features, @@ -1222,8 +1192,7 @@ radv_list_drm_format_modifiers_2(struct radv_physical_device *dev, VkFormat form } static VkResult -radv_check_modifier_support(struct radv_physical_device *dev, - const VkPhysicalDeviceImageFormatInfo2 *info, +radv_check_modifier_support(struct radv_physical_device *dev, const VkPhysicalDeviceImageFormatInfo2 *info, VkImageFormatProperties *props, VkFormat format, uint64_t modifier) { const struct util_format_description *desc = vk_format_description(format); @@ -1236,8 +1205,8 @@ radv_check_modifier_support(struct radv_physical_device *dev, return VK_ERROR_FORMAT_NOT_SUPPORTED; /* We did not add modifiers for sparse textures. */ - if (info->flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | - VK_IMAGE_CREATE_SPARSE_ALIASED_BIT)) + if (info->flags & + (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | VK_IMAGE_CREATE_SPARSE_ALIASED_BIT)) return VK_ERROR_FORMAT_NOT_SUPPORTED; /* @@ -1252,11 +1221,9 @@ radv_check_modifier_support(struct radv_physical_device *dev, .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT, }; - VkFormatProperties2 format_props2 = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, - .pNext = &mod_list}; + VkFormatProperties2 format_props2 = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, .pNext = &mod_list}; - radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format, - &format_props2); + radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format, &format_props2); if (!mod_list.drmFormatModifierCount) return VK_ERROR_FORMAT_NOT_SUPPORTED; @@ -1266,8 +1233,7 @@ radv_check_modifier_support(struct radv_physical_device *dev, if (!mod_list.pDrmFormatModifierProperties) return VK_ERROR_OUT_OF_HOST_MEMORY; - radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format, - &format_props2); + radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format, &format_props2); bool found = false; for (uint32_t i = 0; i < mod_list.drmFormatModifierCount && !found; ++i) @@ -1281,8 +1247,7 @@ radv_check_modifier_support(struct radv_physical_device *dev, bool need_dcc_sign_reinterpret = false; if (ac_modifier_has_dcc(modifier) && - !radv_are_formats_dcc_compatible(dev, info->pNext, format, info->flags, - &need_dcc_sign_reinterpret) && + !radv_are_formats_dcc_compatible(dev, info->pNext, format, info->flags, &need_dcc_sign_reinterpret) && !need_dcc_sign_reinterpret) return VK_ERROR_FORMAT_NOT_SUPPORTED; @@ -1315,23 +1280,19 @@ radv_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice, VkForma vk_format_features2_to_features(format_props.linearTilingFeatures); pFormatProperties->formatProperties.optimalTilingFeatures = vk_format_features2_to_features(format_props.optimalTilingFeatures); - pFormatProperties->formatProperties.bufferFeatures = - vk_format_features2_to_features(format_props.bufferFeatures); + pFormatProperties->formatProperties.bufferFeatures = vk_format_features2_to_features(format_props.bufferFeatures); - VkFormatProperties3 *format_props_extended = - vk_find_struct(pFormatProperties, FORMAT_PROPERTIES_3); + VkFormatProperties3 *format_props_extended = vk_find_struct(pFormatProperties, FORMAT_PROPERTIES_3); if (format_props_extended) { format_props_extended->linearTilingFeatures = format_props.linearTilingFeatures; format_props_extended->optimalTilingFeatures = format_props.optimalTilingFeatures; format_props_extended->bufferFeatures = format_props.bufferFeatures; } - radv_list_drm_format_modifiers( - physical_device, format, &format_props, - vk_find_struct(pFormatProperties, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT)); - radv_list_drm_format_modifiers_2( - physical_device, format, &format_props, - vk_find_struct(pFormatProperties, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_2_EXT)); + radv_list_drm_format_modifiers(physical_device, format, &format_props, + vk_find_struct(pFormatProperties, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT)); + radv_list_drm_format_modifiers_2(physical_device, format, &format_props, + vk_find_struct(pFormatProperties, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_2_EXT)); } static VkResult @@ -1359,8 +1320,8 @@ radv_get_image_format_properties(struct radv_physical_device *physical_device, } else if (tiling == VK_IMAGE_TILING_OPTIMAL) { format_feature_flags = format_props.optimalTilingFeatures; } else if (tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { - format_feature_flags = radv_get_modifier_flags(physical_device, format, - mod_info->drmFormatModifier, &format_props); + format_feature_flags = + radv_get_modifier_flags(physical_device, format, mod_info->drmFormatModifier, &format_props); } else { unreachable("bad VkImageTiling"); } @@ -1410,16 +1371,15 @@ radv_get_image_format_properties(struct radv_physical_device *physical_device, } if (tiling == VK_IMAGE_TILING_OPTIMAL && info->type == VK_IMAGE_TYPE_2D && - (format_feature_flags & (VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT | - VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT)) && + (format_feature_flags & + (VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT)) && !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && !(info->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)) { sampleCounts |= VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT; } - if (tiling == VK_IMAGE_TILING_LINEAR && - (format == VK_FORMAT_R32G32B32_SFLOAT || format == VK_FORMAT_R32G32B32_SINT || - format == VK_FORMAT_R32G32B32_UINT)) { + if (tiling == VK_IMAGE_TILING_LINEAR && (format == VK_FORMAT_R32G32B32_SFLOAT || + format == VK_FORMAT_R32G32B32_SINT || format == VK_FORMAT_R32G32B32_UINT)) { /* R32G32B32 is a weird format and the driver currently only * supports the barely minimum. * TODO: Implement more if we really need to. @@ -1434,8 +1394,7 @@ radv_get_image_format_properties(struct radv_physical_device *physical_device, if (physical_device->rad_info.gfx_level >= GFX9 && info->type == VK_IMAGE_TYPE_3D && vk_format_get_blocksizebits(format) == 128 && vk_format_is_compressed(format) && (info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) && - ((info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) || - (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))) { + ((info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) || (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))) { goto unsupported; } @@ -1443,10 +1402,8 @@ radv_get_image_format_properties(struct radv_physical_device *physical_device, * different format on GFX6. */ if (physical_device->rad_info.gfx_level == GFX6 && info->type == VK_IMAGE_TYPE_1D && - vk_format_is_block_compressed(format) && - (info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) && - ((info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) || - (info->usage & VK_IMAGE_USAGE_STORAGE_BIT))) { + vk_format_is_block_compressed(format) && (info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) && + ((info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) || (info->usage & VK_IMAGE_USAGE_STORAGE_BIT))) { goto unsupported; } @@ -1497,8 +1454,8 @@ radv_get_image_format_properties(struct radv_physical_device *physical_device, } if (image_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) { - if (!(format_feature_flags & (VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT | - VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT))) { + if (!(format_feature_flags & + (VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT))) { goto unsupported; } } @@ -1519,8 +1476,7 @@ radv_get_image_format_properties(struct radv_physical_device *physical_device, goto unsupported; } - if ((info->flags & - (VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT)) && + if ((info->flags & (VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT)) && (desc->layout == UTIL_FORMAT_LAYOUT_ETC && physical_device->emulate_etc2)) { goto unsupported; } @@ -1582,8 +1538,7 @@ get_external_image_format_properties(struct radv_physical_device *physical_devic switch (pImageFormatInfo->type) { case VK_IMAGE_TYPE_2D: - flags = - VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; + flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; break; @@ -1594,8 +1549,7 @@ get_external_image_format_properties(struct radv_physical_device *physical_devic case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: switch (pImageFormatInfo->type) { case VK_IMAGE_TYPE_2D: - flags = - VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; + flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; if (pImageFormatInfo->tiling != VK_IMAGE_TILING_LINEAR) flags |= VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT; @@ -1616,8 +1570,7 @@ get_external_image_format_properties(struct radv_physical_device *physical_devic format_properties->maxArrayLayers = MIN2(1, format_properties->maxArrayLayers); format_properties->sampleCounts &= VK_SAMPLE_COUNT_1_BIT; - flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT | - VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; + flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; /* advertise EXPORTABLE only when radv_create_ahb_memory supports the format */ if (radv_android_gralloc_supports_format(pImageFormatInfo->format, pImageFormatInfo->usage)) @@ -1654,14 +1607,12 @@ radv_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice, VkResult result; VkFormat format = radv_select_android_external_format(base_info->pNext, base_info->format); - result = radv_get_image_format_properties(physical_device, base_info, format, - &base_props->imageFormatProperties); + result = radv_get_image_format_properties(physical_device, base_info, format, &base_props->imageFormatProperties); if (result != VK_SUCCESS) return result; /* Extract input structs */ - vk_foreach_struct_const(s, base_info->pNext) - { + vk_foreach_struct_const (s, base_info->pNext) { switch (s->sType) { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO: external_info = (const void *)s; @@ -1672,8 +1623,7 @@ radv_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice, } /* Extract output structs */ - vk_foreach_struct(s, base_props->pNext) - { + vk_foreach_struct (s, base_props->pNext) { switch (s->sType) { case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES: external_props = (void *)s; @@ -1692,11 +1642,9 @@ radv_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice, } } - bool ahb_supported = - physical_device->vk.supported_extensions.ANDROID_external_memory_android_hardware_buffer; + bool ahb_supported = physical_device->vk.supported_extensions.ANDROID_external_memory_android_hardware_buffer; if (android_usage && ahb_supported) { - android_usage->androidHardwareBufferUsage = - vk_image_usage_to_ahb_usage(base_info->flags, base_info->usage); + android_usage->androidHardwareBufferUsage = vk_image_usage_to_ahb_usage(base_info->flags, base_info->usage); } /* From the Vulkan 1.0.97 spec: @@ -1725,8 +1673,7 @@ radv_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice, * VK_ERROR_FORMAT_NOT_SUPPORTED. */ result = vk_errorf(physical_device, VK_ERROR_FORMAT_NOT_SUPPORTED, - "unsupported VkExternalMemoryTypeFlagBitsKHR 0x%x", - external_info->handleType); + "unsupported VkExternalMemoryTypeFlagBitsKHR 0x%x", external_info->handleType); goto fail; } } @@ -1761,8 +1708,8 @@ fail: } static void -fill_sparse_image_format_properties(struct radv_physical_device *pdev, VkImageType type, - VkFormat format, VkSparseImageFormatProperties *prop) +fill_sparse_image_format_properties(struct radv_physical_device *pdev, VkImageType type, VkFormat format, + VkSparseImageFormatProperties *prop) { prop->aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; prop->flags = 0; @@ -1799,9 +1746,10 @@ fill_sparse_image_format_properties(struct radv_physical_device *pdev, VkImageTy } VKAPI_ATTR void VKAPI_CALL -radv_GetPhysicalDeviceSparseImageFormatProperties2( - VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo, - uint32_t *pPropertyCount, VkSparseImageFormatProperties2 *pProperties) +radv_GetPhysicalDeviceSparseImageFormatProperties2(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo, + uint32_t *pPropertyCount, + VkSparseImageFormatProperties2 *pProperties) { RADV_FROM_HANDLE(radv_physical_device, pdev, physicalDevice); VkResult result; @@ -1830,14 +1778,12 @@ radv_GetPhysicalDeviceSparseImageFormatProperties2( vk_outarray_append_typed(VkSparseImageFormatProperties2, &out, prop) { - fill_sparse_image_format_properties(pdev, pFormatInfo->type, pFormatInfo->format, - &prop->properties); + fill_sparse_image_format_properties(pdev, pFormatInfo->type, pFormatInfo->format, &prop->properties); }; } VKAPI_ATTR void VKAPI_CALL -radv_GetImageSparseMemoryRequirements2(VkDevice _device, - const VkImageSparseMemoryRequirementsInfo2 *pInfo, +radv_GetImageSparseMemoryRequirements2(VkDevice _device, const VkImageSparseMemoryRequirementsInfo2 *pInfo, uint32_t *pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements) { @@ -1854,8 +1800,7 @@ radv_GetImageSparseMemoryRequirements2(VkDevice _device, vk_outarray_append_typed(VkSparseImageMemoryRequirements2, &out, req) { - fill_sparse_image_format_properties(device->physical_device, image->vk.image_type, - image->vk.format, + fill_sparse_image_format_properties(device->physical_device, image->vk.image_type, image->vk.format, &req->memoryRequirements.formatProperties); req->memoryRequirements.imageMipTailFirstLod = image->planes[0].surface.first_mip_tail_level; @@ -1864,18 +1809,15 @@ radv_GetImageSparseMemoryRequirements2(VkDevice _device, /* The tail is always a single tile per layer. */ req->memoryRequirements.imageMipTailSize = 65536; req->memoryRequirements.imageMipTailOffset = - image->planes[0] - .surface.u.gfx9.prt_level_offset[req->memoryRequirements.imageMipTailFirstLod] & - ~65535; - req->memoryRequirements.imageMipTailStride = - image->planes[0].surface.u.gfx9.surf_slice_size; + image->planes[0].surface.u.gfx9.prt_level_offset[req->memoryRequirements.imageMipTailFirstLod] & ~65535; + req->memoryRequirements.imageMipTailStride = image->planes[0].surface.u.gfx9.surf_slice_size; } else { req->memoryRequirements.imageMipTailOffset = (uint64_t)image->planes[0] .surface.u.legacy.level[req->memoryRequirements.imageMipTailFirstLod] - .offset_256B * 256; - req->memoryRequirements.imageMipTailSize = - image->size - req->memoryRequirements.imageMipTailOffset; + .offset_256B * + 256; + req->memoryRequirements.imageMipTailSize = image->size - req->memoryRequirements.imageMipTailOffset; req->memoryRequirements.imageMipTailStride = 0; } } else { @@ -1887,8 +1829,7 @@ radv_GetImageSparseMemoryRequirements2(VkDevice _device, } VKAPI_ATTR void VKAPI_CALL -radv_GetDeviceImageSparseMemoryRequirements(VkDevice device, - const VkDeviceImageMemoryRequirements* pInfo, +radv_GetDeviceImageSparseMemoryRequirements(VkDevice device, const VkDeviceImageMemoryRequirements *pInfo, uint32_t *pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements) { @@ -1899,8 +1840,8 @@ radv_GetDeviceImageSparseMemoryRequirements(VkDevice device, * creating an image. * TODO: Avoid creating an image. */ - result = radv_image_create( - device, &(struct radv_image_create_info){.vk_info = pInfo->pCreateInfo}, NULL, &image, true); + result = + radv_image_create(device, &(struct radv_image_create_info){.vk_info = pInfo->pCreateInfo}, NULL, &image, true); assert(result == VK_SUCCESS); VkImageSparseMemoryRequirementsInfo2 info2 = { @@ -1908,16 +1849,15 @@ radv_GetDeviceImageSparseMemoryRequirements(VkDevice device, .image = image, }; - radv_GetImageSparseMemoryRequirements2(device, &info2, pSparseMemoryRequirementCount, - pSparseMemoryRequirements); + radv_GetImageSparseMemoryRequirements2(device, &info2, pSparseMemoryRequirementCount, pSparseMemoryRequirements); radv_DestroyImage(device, image, NULL); } VKAPI_ATTR void VKAPI_CALL -radv_GetPhysicalDeviceExternalBufferProperties( - VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo, - VkExternalBufferProperties *pExternalBufferProperties) +radv_GetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo, + VkExternalBufferProperties *pExternalBufferProperties) { VkExternalMemoryFeatureFlagBits flags = 0; VkExternalMemoryHandleTypeFlags export_flags = 0; @@ -1926,8 +1866,8 @@ radv_GetPhysicalDeviceExternalBufferProperties( case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; - compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | - VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + compat_flags = export_flags = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; break; case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; @@ -1954,8 +1894,7 @@ enum dcc_channel_type { /* Return the type of DCC encoding. */ static void -radv_get_dcc_channel_type(const struct util_format_description *desc, enum dcc_channel_type *type, - unsigned *size) +radv_get_dcc_channel_type(const struct util_format_description *desc, enum dcc_channel_type *type, unsigned *size) { int i = util_format_get_first_non_void_channel(desc->format); if (i == -1) { @@ -1984,8 +1923,7 @@ radv_get_dcc_channel_type(const struct util_format_description *desc, enum dcc_c /* Return if it's allowed to reinterpret one format as another with DCC enabled. */ bool -radv_dcc_formats_compatible(enum amd_gfx_level gfx_level, VkFormat format1, VkFormat format2, - bool *sign_reinterpret) +radv_dcc_formats_compatible(enum amd_gfx_level gfx_level, VkFormat format1, VkFormat format2, bool *sign_reinterpret) { const struct util_format_description *desc1, *desc2; enum dcc_channel_type type1, type2; diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 46438c6..79416c5 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -25,10 +25,10 @@ * IN THE SOFTWARE. */ -#include "ac_drm_fourcc.h" -#include "util/u_debug.h" #include "util/u_atomic.h" +#include "util/u_debug.h" #include "vulkan/util/vk_format.h" +#include "ac_drm_fourcc.h" #include "radv_debug.h" #include "radv_private.h" #include "radv_radeon_winsys.h" @@ -40,8 +40,7 @@ #include "gfx10_format_table.h" static unsigned -radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, - VkFormat format) +radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format) { if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) { assert(pCreateInfo->samples <= 1); @@ -70,8 +69,7 @@ radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateI } static bool -radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, - VkFormat format) +radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format) { /* TC-compat HTILE is only available for GFX8+. */ if (device->physical_device->rad_info.gfx_level < GFX8) @@ -83,8 +81,8 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea /* Do not enable TC-compatible HTILE if the image isn't readable by a * shader because no texture fetches will happen. */ - if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT))) + if (!(pCreateInfo->usage & + (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT))) return false; if (device->physical_device->rad_info.gfx_level < GFX9) { @@ -99,8 +97,7 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea * the driver allows TC-compat HTILE for 16-bit depth surfaces * with no Z planes compression. */ - if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT && - format != VK_FORMAT_D16_UNORM) + if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT && format != VK_FORMAT_D16_UNORM) return false; } @@ -121,8 +118,7 @@ radv_surface_has_scanout(struct radv_device *device, const struct radv_image_cre } static bool -radv_image_use_fast_clear_for_image_early(const struct radv_device *device, - const struct radv_image *image) +radv_image_use_fast_clear_for_image_early(const struct radv_device *device, const struct radv_image *image) { if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) return true; @@ -140,24 +136,22 @@ radv_image_use_fast_clear_for_image_early(const struct radv_device *device, } static bool -radv_image_use_fast_clear_for_image(const struct radv_device *device, - const struct radv_image *image) +radv_image_use_fast_clear_for_image(const struct radv_device *device, const struct radv_image *image) { if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) return true; - return radv_image_use_fast_clear_for_image_early(device, image) && - (image->exclusive || - /* Enable DCC for concurrent images if stores are - * supported because that means we can keep DCC compressed on - * all layouts/queues. - */ - radv_image_use_dcc_image_stores(device, image)); + return radv_image_use_fast_clear_for_image_early(device, image) && (image->exclusive || + /* Enable DCC for concurrent images if stores are + * supported because that means we can keep DCC + * compressed on all layouts/queues. + */ + radv_image_use_dcc_image_stores(device, image)); } bool -radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext, - VkFormat format, VkImageCreateFlags flags, bool *sign_reinterpret) +radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext, VkFormat format, + VkImageCreateFlags flags, bool *sign_reinterpret) { bool blendable; @@ -169,8 +163,7 @@ radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const v if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) { const struct VkImageFormatListCreateInfo *format_list = - (const struct VkImageFormatListCreateInfo *)vk_find_struct_const( - pNext, IMAGE_FORMAT_LIST_CREATE_INFO); + (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(pNext, IMAGE_FORMAT_LIST_CREATE_INFO); /* We have to ignore the existence of the list if viewFormatCount = 0 */ if (format_list && format_list->viewFormatCount) { @@ -180,8 +173,8 @@ radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const v if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED) continue; - if (!radv_dcc_formats_compatible(pdev->rad_info.gfx_level, format, - format_list->pViewFormats[i], sign_reinterpret)) + if (!radv_dcc_formats_compatible(pdev->rad_info.gfx_level, format, format_list->pViewFormats[i], + sign_reinterpret)) return false; } } else { @@ -202,16 +195,14 @@ radv_format_is_atomic_allowed(struct radv_device *device, VkFormat format) } static bool -radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format, - VkImageCreateFlags flags) +radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format, VkImageCreateFlags flags) { if (radv_format_is_atomic_allowed(device, format)) return true; if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) { const struct VkImageFormatListCreateInfo *format_list = - (const struct VkImageFormatListCreateInfo *)vk_find_struct_const( - pNext, IMAGE_FORMAT_LIST_CREATE_INFO); + (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(pNext, IMAGE_FORMAT_LIST_CREATE_INFO); /* We have to ignore the existence of the list if viewFormatCount = 0 */ if (format_list && format_list->viewFormatCount) { @@ -226,9 +217,8 @@ radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, Vk } static bool -radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image, - const VkImageCreateInfo *pCreateInfo, VkFormat format, - bool *sign_reinterpret) +radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image, const VkImageCreateInfo *pCreateInfo, + VkFormat format, bool *sign_reinterpret) { /* DCC (Delta Color Compression) is only available for GFX8+. */ if (device->physical_device->rad_info.gfx_level < GFX8) @@ -285,8 +275,8 @@ radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *imag if (device->physical_device->rad_info.gfx_level == GFX11 && pCreateInfo->mipLevels > 1) return false; - return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format, - pCreateInfo->flags, sign_reinterpret); + return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format, pCreateInfo->flags, + sign_reinterpret); } static bool @@ -303,8 +293,7 @@ radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image /* TODO: Fix storage images with DCC without DCC image stores. * Disabling it for now. */ - if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && - !radv_image_use_dcc_image_stores(device, image)) + if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && !radv_image_use_dcc_image_stores(device, image)) return false; return true; @@ -323,8 +312,7 @@ radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image bool radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image) { - return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.gfx_level, - &image->planes[0].surface); + return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.gfx_level, &image->planes[0].surface); } /* @@ -354,12 +342,11 @@ radv_use_htile_for_image(const struct radv_device *device, const struct radv_ima * - Investigate about mips+layers. * - Enable on other gens. */ - bool use_htile_for_mips = - image->vk.array_layers == 1 && device->physical_device->rad_info.gfx_level >= GFX10; + bool use_htile_for_mips = image->vk.array_layers == 1 && device->physical_device->rad_info.gfx_level >= GFX10; /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */ - if (device->physical_device->rad_info.gfx_level == GFX10 && - image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->vk.mip_levels > 1) + if (device->physical_device->rad_info.gfx_level == GFX10 && image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && + image->vk.mip_levels > 1) return false; /* Do not enable HTILE for very small images because it seems less performant but make sure it's @@ -381,23 +368,21 @@ radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image return false; /* GFX9 has issues when sample count is greater than 2 */ - if (device->physical_device->rad_info.gfx_level == GFX9 && - image->vk.samples > 2) + if (device->physical_device->rad_info.gfx_level == GFX9 && image->vk.samples > 2) return false; if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK) return false; /* TC-compat CMASK with storage images is supported on GFX10+. */ - if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && - device->physical_device->rad_info.gfx_level < GFX10) + if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && device->physical_device->rad_info.gfx_level < GFX10) return false; /* Do not enable TC-compatible if the image isn't readable by a shader * because no texture fetches will happen. */ - if (!(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT))) + if (!(image->vk.usage & + (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT))) return false; /* If the image doesn't have FMASK, it can't be fetchable. */ @@ -457,8 +442,7 @@ radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf static VkResult radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image, - const struct radv_image_create_info *create_info, - struct ac_surf_info *image_info) + const struct radv_image_create_info *create_info, struct ac_surf_info *image_info) { unsigned width = image->vk.extent.width; unsigned height = image->vk.extent.height; @@ -470,8 +454,7 @@ radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image * * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/ */ - if (create_info->bo_metadata && - radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) { + if (create_info->bo_metadata && radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) { const struct radeon_bo_metadata *md = create_info->bo_metadata; if (device->physical_device->rad_info.gfx_level >= GFX10) { @@ -518,8 +501,7 @@ radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image static VkResult radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image, - const struct radv_image_create_info *create_info, - struct ac_surf_info *image_info) + const struct radv_image_create_info *create_info, struct ac_surf_info *image_info) { VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info); if (result != VK_SUCCESS) @@ -527,8 +509,7 @@ radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image * for (unsigned plane = 0; plane < image->plane_count; ++plane) { if (create_info->bo_metadata) { - radv_patch_surface_from_metadata(device, &image->planes[plane].surface, - create_info->bo_metadata); + radv_patch_surface_from_metadata(device, &image->planes[plane].surface, create_info->bo_metadata); } if (radv_surface_has_scanout(device, create_info)) { @@ -573,11 +554,9 @@ etc2_emulation_format(VkFormat format) } static VkFormat -radv_image_get_plane_format(const struct radv_physical_device *pdev, const struct radv_image *image, - unsigned plane) +radv_image_get_plane_format(const struct radv_physical_device *pdev, const struct radv_image *image, unsigned plane) { - if (pdev->emulate_etc2 && - vk_format_description(image->vk.format)->layout == UTIL_FORMAT_LAYOUT_ETC) { + if (pdev->emulate_etc2 && vk_format_description(image->vk.format)->layout == UTIL_FORMAT_LAYOUT_ETC) { if (plane == 0) return image->vk.format; return etc2_emulation_format(image->vk.format); @@ -640,8 +619,7 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns flags |= RADEON_SURF_NO_STENCIL_ADJUST; } - if (radv_use_htile_for_image(device, image) && - !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ) && + if (radv_use_htile_for_image(device, image) && !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ) && !(flags & RADEON_SURF_NO_RENDER_TARGET)) { if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format)) flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; @@ -653,21 +631,18 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns if (is_stencil) flags |= RADEON_SURF_SBUFFER; - if (device->physical_device->rad_info.gfx_level >= GFX9 && - pCreateInfo->imageType == VK_IMAGE_TYPE_3D && + if (device->physical_device->rad_info.gfx_level >= GFX9 && pCreateInfo->imageType == VK_IMAGE_TYPE_3D && vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format)) flags |= RADEON_SURF_NO_RENDER_TARGET; - if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format, - &image->dcc_sign_reinterpret)) + if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format, &image->dcc_sign_reinterpret)) flags |= RADEON_SURF_DISABLE_DCC; if (!radv_use_fmask_for_image(device, image)) flags |= RADEON_SURF_NO_FMASK; if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) { - flags |= - RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC; + flags |= RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC; } /* Disable DCC for VRS rate images because the hw can't handle compression. */ @@ -729,8 +704,7 @@ radv_compose_swizzle(const struct util_format_description *desc, const VkCompone for (unsigned i = 0; i < 4; i++) swizzle[i] = desc->swizzle[i]; } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0, - PIPE_SWIZZLE_1}; + const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0, PIPE_SWIZZLE_1}; vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle); } else { vk_format_compose_swizzles(mapping, desc->swizzle, swizzle); @@ -738,8 +712,8 @@ radv_compose_swizzle(const struct util_format_description *desc, const VkCompone } void -radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFormat vk_format, - unsigned offset, unsigned range, uint32_t *state) +radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFormat vk_format, unsigned offset, + unsigned range, uint32_t *state) { const struct util_format_description *desc; unsigned stride; @@ -760,13 +734,12 @@ radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFor range /= stride; } - rsrc_word3 = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | - S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | - S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | - S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3])); + rsrc_word3 = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | + S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3])); if (device->physical_device->rad_info.gfx_level >= GFX10) { - const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&device->physical_device->rad_info)[vk_format_to_pipe_format(vk_format)]; + const struct gfx10_format *fmt = + &ac_get_gfx10_format_table(&device->physical_device->rad_info)[vk_format_to_pipe_format(vk_format)]; /* OOB_SELECT chooses the out-of-bounds check. * @@ -788,8 +761,7 @@ radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFor * else: * offset+payload > NUM_RECORDS */ - rsrc_word3 |= S_008F0C_FORMAT(fmt->img_format) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) | + rsrc_word3 |= S_008F0C_FORMAT(fmt->img_format) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) | S_008F0C_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level < GFX11); } else { num_format = radv_translate_buffer_numformat(desc, first_non_void); @@ -809,10 +781,9 @@ radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFor static void si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image, - const struct legacy_surf_level *base_level_info, unsigned plane_id, - unsigned base_level, unsigned first_level, unsigned block_width, - bool is_stencil, bool is_storage_image, bool disable_compression, - bool enable_write_compression, uint32_t *state, + const struct legacy_surf_level *base_level_info, unsigned plane_id, unsigned base_level, + unsigned first_level, unsigned block_width, bool is_stencil, bool is_storage_image, + bool disable_compression, bool enable_write_compression, uint32_t *state, const struct ac_surf_nbc_view *nbc_view) { struct radv_image_plane *plane = &image->planes[plane_id]; @@ -868,10 +839,8 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im * If an imported image is used with VK_IMAGE_VIEW_TYPE_2D_ARRAY, it may hang due to VM faults * because DEPTH means pitch with 2D, but it means depth with 2D array. */ - if (device->physical_device->rad_info.gfx_level >= GFX10_3 && - image->vk.image_type == VK_IMAGE_TYPE_2D && - plane->surface.is_linear && - util_is_power_of_two_nonzero(plane->surface.bpe) && + if (device->physical_device->rad_info.gfx_level >= GFX10_3 && image->vk.image_type == VK_IMAGE_TYPE_2D && + plane->surface.is_linear && util_is_power_of_two_nonzero(plane->surface.bpe) && G_00A00C_TYPE(state[3]) == V_008F1C_SQ_RSRC_IMG_2D) { assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0); unsigned pitch = plane->surface.u.gfx9.surf_pitch; @@ -908,8 +877,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im if (radv_dcc_enabled(image, first_level) && is_storage_image && enable_write_compression) state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1); - state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) | - S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8); + state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) | S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8); } state[7] = meta_va >> 16; @@ -925,8 +893,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.epitch); } - state[5] &= - C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED; + state[5] &= C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED; if (meta_va) { struct gfx9_surf_meta_flags meta = { .rb_aligned = 1, @@ -936,8 +903,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER)) meta = plane->surface.u.gfx9.color.dcc; - state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) | - S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) | + state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) | S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) | S_008F24_META_RB_ALIGNED(meta.rb_aligned); } } else { @@ -953,8 +919,8 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im } static unsigned -radv_tex_dim(VkImageType image_type, VkImageViewType view_type, unsigned nr_layers, - unsigned nr_samples, bool is_storage_image, bool gfx9) +radv_tex_dim(VkImageType image_type, VkImageViewType view_type, unsigned nr_layers, unsigned nr_samples, + bool is_storage_image, bool gfx9) { if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE; @@ -1033,15 +999,12 @@ vi_alpha_is_on_msb(struct radv_device *device, VkFormat format) * Build the sampler view descriptor for a texture (GFX10). */ static void -gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *image, - bool is_storage_image, VkImageViewType view_type, VkFormat vk_format, - const VkComponentMapping *mapping, unsigned first_level, - unsigned last_level, unsigned first_layer, unsigned last_layer, - unsigned width, unsigned height, unsigned depth, float min_lod, - uint32_t *state, uint32_t *fmask_state, - VkImageCreateFlags img_create_flags, - const struct ac_surf_nbc_view *nbc_view, - const VkImageViewSlicedCreateInfoEXT *sliced_3d) +gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *image, bool is_storage_image, + VkImageViewType view_type, VkFormat vk_format, const VkComponentMapping *mapping, + unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer, + unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state, + uint32_t *fmask_state, VkImageCreateFlags img_create_flags, + const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d) { const struct util_format_description *desc; enum pipe_swizzle swizzle[4]; @@ -1053,15 +1016,14 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima /* For emulated ETC2 without alpha we need to override the format to a 3-componenent format, so * that border colors work correctly (alpha forced to 1). Since Vulkan has no such format, * this uses the Gallium formats to set the description. */ - if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK && - vk_format == VK_FORMAT_R8G8B8A8_UNORM) { + if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK && vk_format == VK_FORMAT_R8G8B8A8_UNORM) { desc = util_format_description(PIPE_FORMAT_R8G8B8X8_UNORM); - } else if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK && - vk_format == VK_FORMAT_R8G8B8A8_SRGB) { + } else if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK && vk_format == VK_FORMAT_R8G8B8A8_SRGB) { desc = util_format_description(PIPE_FORMAT_R8G8B8X8_SRGB); } - img_format = ac_get_gfx10_format_table(&device->physical_device->rad_info)[vk_format_to_pipe_format(vk_format)].img_format; + img_format = + ac_get_gfx10_format_table(&device->physical_device->rad_info)[vk_format_to_pipe_format(vk_format)].img_format; radv_compose_swizzle(desc, mapping, swizzle); @@ -1069,8 +1031,8 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima assert(image->vk.image_type == VK_IMAGE_TYPE_3D); type = V_008F1C_SQ_RSRC_IMG_3D; } else { - type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, - is_storage_image, device->physical_device->rad_info.gfx_level == GFX9); + type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, is_storage_image, + device->physical_device->rad_info.gfx_level == GFX9); } if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { @@ -1083,25 +1045,20 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima depth = image->vk.array_layers / 6; state[0] = 0; - state[1] = S_00A004_FORMAT(img_format) | - S_00A004_WIDTH_LO(width - 1); + state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1); state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | S_00A008_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level < GFX11); - state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | - S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | - S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | - S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) | + state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | + S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) | S_00A00C_BASE_LEVEL(image->vk.samples > 1 ? 0 : first_level) | - S_00A00C_LAST_LEVEL(image->vk.samples > 1 ? util_logbase2(image->vk.samples) - : last_level) | + S_00A00C_LAST_LEVEL(image->vk.samples > 1 ? util_logbase2(image->vk.samples) : last_level) | S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc)) | S_00A00C_TYPE(type); /* Depth is the the last accessible layer on gfx9+. The hw doesn't need * to know the total number of layers. */ - state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) | - S_00A010_BASE_ARRAY(first_layer); - state[5] = S_00A014_ARRAY_PITCH(0) | - S_00A014_PERF_MOD(4); + state[4] = + S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) | S_00A010_BASE_ARRAY(first_layer); + state[5] = S_00A014_ARRAY_PITCH(0) | S_00A014_PERF_MOD(4); state[6] = 0; state[7] = 0; @@ -1121,8 +1078,9 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima assert(type == V_008F1C_SQ_RSRC_IMG_3D && is_storage_image); unsigned first_slice = sliced_3d->sliceOffset; - unsigned slice_count = sliced_3d->sliceCount == VK_REMAINING_3D_SLICES_EXT ? - MAX2(1, total - sliced_3d->sliceOffset) : sliced_3d->sliceCount; + unsigned slice_count = sliced_3d->sliceCount == VK_REMAINING_3D_SLICES_EXT + ? MAX2(1, total - sliced_3d->sliceOffset) + : sliced_3d->sliceCount; unsigned last_slice = first_slice + slice_count - 1; state[4] = 0; @@ -1130,8 +1088,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima state[5] |= S_00A014_ARRAY_PITCH(1); } - unsigned max_mip = - image->vk.samples > 1 ? util_logbase2(image->vk.samples) : image->vk.mip_levels - 1; + unsigned max_mip = image->vk.samples > 1 ? util_logbase2(image->vk.samples) : image->vk.mip_levels - 1; if (nbc_view && nbc_view->valid) max_mip = nbc_view->num_levels - 1; @@ -1146,10 +1103,10 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima } if (radv_dcc_enabled(image, first_level)) { - state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | - S_00A018_MAX_COMPRESSED_BLOCK_SIZE( - image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size) | - S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format)); + state[6] |= + S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | + S_00A018_MAX_COMPRESSED_BLOCK_SIZE(image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size) | + S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format)); } if (radv_image_get_iterate256(device, image)) { @@ -1182,16 +1139,14 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima } fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle; - fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) | - S_00A004_WIDTH_LO(width - 1); - fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | - S_00A008_RESOURCE_LEVEL(1); + fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) | S_00A004_WIDTH_LO(width - 1); + fmask_state[2] = + S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | S_00A008_RESOURCE_LEVEL(1); fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) | S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode) | - S_00A00C_TYPE( - radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, 0, false, false)); + S_00A00C_TYPE(radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, 0, false, false)); fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer); fmask_state[5] = 0; fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1); @@ -1213,13 +1168,11 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima * Build the sampler view descriptor for a texture (SI-GFX9) */ static void -si_make_texture_descriptor(struct radv_device *device, struct radv_image *image, - bool is_storage_image, VkImageViewType view_type, VkFormat vk_format, - const VkComponentMapping *mapping, unsigned first_level, - unsigned last_level, unsigned first_layer, unsigned last_layer, - unsigned width, unsigned height, unsigned depth, float min_lod, - uint32_t *state, uint32_t *fmask_state, - VkImageCreateFlags img_create_flags) +si_make_texture_descriptor(struct radv_device *device, struct radv_image *image, bool is_storage_image, + VkImageViewType view_type, VkFormat vk_format, const VkComponentMapping *mapping, + unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer, + unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state, + uint32_t *fmask_state, VkImageCreateFlags img_create_flags) { const struct util_format_description *desc; enum pipe_swizzle swizzle[4]; @@ -1231,11 +1184,9 @@ si_make_texture_descriptor(struct radv_device *device, struct radv_image *image, /* For emulated ETC2 without alpha we need to override the format to a 3-componenent format, so * that border colors work correctly (alpha forced to 1). Since Vulkan has no such format, * this uses the Gallium formats to set the description. */ - if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK && - vk_format == VK_FORMAT_R8G8B8A8_UNORM) { + if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK && vk_format == VK_FORMAT_R8G8B8A8_UNORM) { desc = util_format_description(PIPE_FORMAT_R8G8B8X8_UNORM); - } else if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK && - vk_format == VK_FORMAT_R8G8B8A8_SRGB) { + } else if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK && vk_format == VK_FORMAT_R8G8B8A8_SRGB) { desc = util_format_description(PIPE_FORMAT_R8G8B8X8_SRGB); } @@ -1267,8 +1218,8 @@ si_make_texture_descriptor(struct radv_device *device, struct radv_image *image, assert(image->vk.image_type == VK_IMAGE_TYPE_3D); type = V_008F1C_SQ_RSRC_IMG_3D; } else { - type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, - is_storage_image, device->physical_device->rad_info.gfx_level == GFX9); + type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, is_storage_image, + device->physical_device->rad_info.gfx_level == GFX9); } if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { @@ -1281,17 +1232,13 @@ si_make_texture_descriptor(struct radv_device *device, struct radv_image *image, depth = image->vk.array_layers / 6; state[0] = 0; - state[1] = (S_008F14_MIN_LOD(radv_float_to_ufixed(CLAMP(min_lod, 0, 15), 8)) | - S_008F14_DATA_FORMAT(data_format) | + state[1] = (S_008F14_MIN_LOD(radv_float_to_ufixed(CLAMP(min_lod, 0, 15), 8)) | S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format)); state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4)); - state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | - S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | - S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | - S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) | + state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | + S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) | S_008F1C_BASE_LEVEL(image->vk.samples > 1 ? 0 : first_level) | - S_008F1C_LAST_LEVEL(image->vk.samples > 1 ? util_logbase2(image->vk.samples) - : last_level) | + S_008F1C_LAST_LEVEL(image->vk.samples > 1 ? util_logbase2(image->vk.samples) : last_level) | S_008F1C_TYPE(type)); state[4] = 0; state[5] = S_008F24_BASE_ARRAY(first_layer); @@ -1310,15 +1257,13 @@ si_make_texture_descriptor(struct radv_device *device, struct radv_image *image, state[4] |= S_008F20_DEPTH(last_layer); state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle); - state[5] |= S_008F24_MAX_MIP(image->vk.samples > 1 ? util_logbase2(image->vk.samples) - : image->vk.mip_levels - 1); + state[5] |= S_008F24_MAX_MIP(image->vk.samples > 1 ? util_logbase2(image->vk.samples) : image->vk.mip_levels - 1); } else { state[3] |= S_008F1C_POW2_PAD(image->vk.mip_levels > 1); state[4] |= S_008F20_DEPTH(depth - 1); state[5] |= S_008F24_LAST_ARRAY(last_layer); } - if (!(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) && - image->planes[0].surface.meta_offset) { + if (!(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) && image->planes[0].surface.meta_offset) { state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format)); } else { if (device->instance->disable_aniso_single_level) { @@ -1380,14 +1325,13 @@ si_make_texture_descriptor(struct radv_device *device, struct radv_image *image, fmask_state[0] = va >> 8; fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle; - fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(fmask_format) | - S_008F14_NUM_FORMAT(num_format); + fmask_state[1] = + S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(fmask_format) | S_008F14_NUM_FORMAT(num_format); fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1); fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | - S_008F1C_TYPE( - radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, 0, false, false)); + S_008F1C_TYPE(radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, 0, false, false)); fmask_state[4] = 0; fmask_state[5] = S_008F24_BASE_ARRAY(first_layer); fmask_state[6] = 0; @@ -1395,8 +1339,8 @@ si_make_texture_descriptor(struct radv_device *device, struct radv_image *image, if (device->physical_device->rad_info.gfx_level == GFX9) { fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode); - fmask_state[4] |= S_008F20_DEPTH(last_layer) | - S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch); + fmask_state[4] |= + S_008F20_DEPTH(last_layer) | S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch); fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | S_008F24_META_RB_ALIGNED(1); if (radv_image_is_tc_compat_cmask(image)) { @@ -1407,11 +1351,9 @@ si_make_texture_descriptor(struct radv_device *device, struct radv_image *image, fmask_state[7] |= va >> 8; } } else { - fmask_state[3] |= - S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.color.fmask.tiling_index); - fmask_state[4] |= - S_008F20_DEPTH(depth - 1) | - S_008F20_PITCH(image->planes[0].surface.u.legacy.color.fmask.pitch_in_pixels - 1); + fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.color.fmask.tiling_index); + fmask_state[4] |= S_008F20_DEPTH(depth - 1) | + S_008F20_PITCH(image->planes[0].surface.u.legacy.color.fmask.pitch_in_pixels - 1); fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer); if (radv_image_is_tc_compat_cmask(image)) { @@ -1427,53 +1369,47 @@ si_make_texture_descriptor(struct radv_device *device, struct radv_image *image, } static void -radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image, - bool is_storage_image, VkImageViewType view_type, VkFormat vk_format, - const VkComponentMapping *mapping, unsigned first_level, - unsigned last_level, unsigned first_layer, unsigned last_layer, +radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image, bool is_storage_image, + VkImageViewType view_type, VkFormat vk_format, const VkComponentMapping *mapping, + unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer, unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state, uint32_t *fmask_state, VkImageCreateFlags img_create_flags, - const struct ac_surf_nbc_view *nbc_view, - const VkImageViewSlicedCreateInfoEXT *sliced_3d) + const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d) { if (device->physical_device->rad_info.gfx_level >= GFX10) { - gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, - first_level, last_level, first_layer, last_layer, width, height, - depth, min_lod, state, fmask_state, img_create_flags, nbc_view, - sliced_3d); + gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, first_level, + last_level, first_layer, last_layer, width, height, depth, min_lod, state, + fmask_state, img_create_flags, nbc_view, sliced_3d); } else { - si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, - first_level, last_level, first_layer, last_layer, width, height, - depth, min_lod, state, fmask_state, img_create_flags); + si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, first_level, + last_level, first_layer, last_layer, width, height, depth, min_lod, state, fmask_state, + img_create_flags); } } static void -radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, - struct radeon_bo_metadata *md) +radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *md) { static const VkComponentMapping fixedmapping; uint32_t desc[8]; assert(image->plane_count == 1); - radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type, - image->vk.format, &fixedmapping, 0, image->vk.mip_levels - 1, 0, - image->vk.array_layers - 1, image->vk.extent.width, image->vk.extent.height, - image->vk.extent.depth, 0.0f, desc, NULL, 0, NULL, NULL); + radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type, image->vk.format, + &fixedmapping, 0, image->vk.mip_levels - 1, 0, image->vk.array_layers - 1, + image->vk.extent.width, image->vk.extent.height, image->vk.extent.depth, 0.0f, desc, + NULL, 0, NULL, NULL); - si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, - 0, image->planes[0].surface.blk_w, false, false, false, false, - desc, NULL); + si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0, + image->planes[0].surface.blk_w, false, false, false, false, desc, NULL); - ac_surface_compute_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface, - image->vk.mip_levels, desc, &md->size_metadata, md->metadata, + ac_surface_compute_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface, image->vk.mip_levels, + desc, &md->size_metadata, md->metadata, device->instance->debug_flags & RADV_DEBUG_EXTRA_MD); } void -radv_init_metadata(struct radv_device *device, struct radv_image *image, - struct radeon_bo_metadata *metadata) +radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata) { struct radeon_surf *surface = &image->planes[0].surface; @@ -1481,23 +1417,19 @@ radv_init_metadata(struct radv_device *device, struct radv_image *image, if (device->physical_device->rad_info.gfx_level >= GFX9) { uint64_t dcc_offset = - image->bindings[0].offset + - (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset); + image->bindings[0].offset + (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset); metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode; metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8; metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.color.display_dcc_pitch_max; metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.color.dcc.independent_64B_blocks; metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.color.dcc.independent_128B_blocks; - metadata->u.gfx9.dcc_max_compressed_block_size = - surface->u.gfx9.color.dcc.max_compressed_block_size; + metadata->u.gfx9.dcc_max_compressed_block_size = surface->u.gfx9.color.dcc.max_compressed_block_size; metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; } else { - metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D - ? RADEON_LAYOUT_TILED - : RADEON_LAYOUT_LINEAR; - metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D - ? RADEON_LAYOUT_TILED - : RADEON_LAYOUT_LINEAR; + metadata->u.legacy.microtile = + surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; + metadata->u.legacy.macrotile = + surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ? RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config; metadata->u.legacy.bankw = surface->u.legacy.bankw; metadata->u.legacy.bankh = surface->u.legacy.bankh; @@ -1511,20 +1443,19 @@ radv_init_metadata(struct radv_device *device, struct radv_image *image, } void -radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, - uint64_t offset, uint32_t stride) +radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, uint64_t offset, + uint32_t stride) { ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[0].surface, image->vk.array_layers, image->vk.mip_levels, offset, stride); } static void -radv_image_alloc_single_sample_cmask(const struct radv_device *device, - const struct radv_image *image, struct radeon_surf *surf) +radv_image_alloc_single_sample_cmask(const struct radv_device *device, const struct radv_image *image, + struct radeon_surf *surf) { if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->vk.mip_levels > 1 || - image->vk.extent.depth > 1 || radv_image_has_dcc(image) || - !radv_image_use_fast_clear_for_image(device, image) || + image->vk.extent.depth > 1 || radv_image_has_dcc(image) || !radv_image_use_fast_clear_for_image(device, image) || (image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)) return; @@ -1552,14 +1483,13 @@ radv_image_alloc_values(const struct radv_device *device, struct radv_image *ima image->size += 8 * image->vk.mip_levels; } - if ((radv_image_has_dcc(image) && !image->support_comp_to_single) || - radv_image_has_cmask(image) || radv_image_has_htile(image)) { + if ((radv_image_has_dcc(image) && !image->support_comp_to_single) || radv_image_has_cmask(image) || + radv_image_has_htile(image)) { image->clear_value_offset = image->size; image->size += 8 * image->vk.mip_levels; } - if (radv_image_is_tc_compat_htile(image) && - device->physical_device->rad_info.has_tc_compat_zrange_bug) { + if (radv_image_is_tc_compat_htile(image) && device->physical_device->rad_info.has_tc_compat_zrange_bug) { /* Metadata for the TC-compatible HTILE hardware bug which * have to be fixed by updating ZRANGE_PRECISION when doing * fast depth clears to 0.0f. @@ -1624,12 +1554,10 @@ static bool radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image) { if (device->physical_device->rad_info.gfx_level >= GFX10) { - return !device->physical_device->rad_info.tcc_rb_non_coherent && - !radv_image_is_pipe_misaligned(device, image); + return !device->physical_device->rad_info.tcc_rb_non_coherent && !radv_image_is_pipe_misaligned(device, image); } else if (device->physical_device->rad_info.gfx_level == GFX9) { if (image->vk.samples == 1 && - (image->vk.usage & - (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) && + (image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) && !vk_format_has_stencil(image->vk.format)) { /* Single-sample color and single-sample depth * (not stencil) are coherent with shaders on @@ -1758,8 +1686,7 @@ radv_get_ac_surf_info(struct radv_device *device, const struct radv_image *image info.num_channels = vk_format_get_nr_components(image->vk.format); if (!vk_format_is_depth_or_stencil(image->vk.format) && !image->shareable && - !(image->vk.create_flags & (VK_IMAGE_CREATE_SPARSE_ALIASED_BIT | - VK_IMAGE_CREATE_ALIAS_BIT)) && + !(image->vk.create_flags & (VK_IMAGE_CREATE_SPARSE_ALIASED_BIT | VK_IMAGE_CREATE_ALIAS_BIT)) && image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { info.surf_index = &device->image_mrt_offset_counter; } @@ -1770,8 +1697,7 @@ radv_get_ac_surf_info(struct radv_device *device, const struct radv_image *image VkResult radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info, const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info, - const struct VkVideoProfileListInfoKHR *profile_list, - struct radv_image *image) + const struct VkVideoProfileListInfoKHR *profile_list, struct radv_image *image) { /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the * common internal case. */ @@ -1809,8 +1735,7 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in info.height = vk_format_get_plane_height(image->vk.format, plane, info.height); if (create_info.no_metadata_planes || plane_count > 1) { - image->planes[plane].surface.flags |= - RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE; + image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE; } device->ws->surface_init(device->ws, &info, &image->planes[plane].surface); @@ -1821,14 +1746,12 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in } if (create_info.bo_metadata && !mod_info && - !ac_surface_apply_umd_metadata(&device->physical_device->rad_info, - &image->planes[plane].surface, image->vk.samples, - image->vk.mip_levels, create_info.bo_metadata->size_metadata, - create_info.bo_metadata->metadata)) + !ac_surface_apply_umd_metadata(&device->physical_device->rad_info, &image->planes[plane].surface, + image->vk.samples, image->vk.mip_levels, + create_info.bo_metadata->size_metadata, create_info.bo_metadata->metadata)) return VK_ERROR_INVALID_EXTERNAL_HANDLE; - if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 && - !mod_info) + if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 && !mod_info) radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface); if (mod_info) { @@ -1839,15 +1762,12 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in offset = mod_info->pPlaneLayouts[plane].offset; stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe; } else { - offset = image->disjoint ? 0 : - align64(image->size, 1ull << image->planes[plane].surface.alignment_log2); + offset = image->disjoint ? 0 : align64(image->size, 1ull << image->planes[plane].surface.alignment_log2); stride = 0; /* 0 means no override */ } - if (!ac_surface_override_offset_stride(&device->physical_device->rad_info, - &image->planes[plane].surface, - image->vk.array_layers, image->vk.mip_levels, - offset, stride)) + if (!ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[plane].surface, + image->vk.array_layers, image->vk.mip_levels, offset, stride)) return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; /* Validate DCC offsets in modifier layout. */ @@ -1857,9 +1777,8 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; for (unsigned i = 1; i < mem_planes; ++i) { - if (ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, - &image->planes[plane].surface, i, - 0) != mod_info->pPlaneLayouts[i].offset) + if (ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, &image->planes[plane].surface, + i, 0) != mod_info->pPlaneLayouts[i].offset) return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; } } @@ -1867,12 +1786,10 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size); image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2); - image->planes[plane].format = - radv_image_get_plane_format(device->physical_device, image, plane); + image->planes[plane].format = radv_image_get_plane_format(device->physical_device, image, plane); } - image->tc_compatible_cmask = - radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image); + image->tc_compatible_cmask = radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image); image->l2_coherent = radv_image_is_l2_coherent(device, image); @@ -1887,8 +1804,7 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in } static void -radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator, - struct radv_image *image) +radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator, struct radv_image *image) { if ((image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bindings[0].bo) { radv_rmv_log_bo_destroy(device, image->bindings[0].bo); @@ -1919,8 +1835,7 @@ radv_image_print_info(struct radv_device *device, struct radv_image *image) const struct radv_image_plane *plane = &image->planes[i]; const struct radeon_surf *surf = &plane->surface; const struct util_format_description *desc = vk_format_description(plane->format); - uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, - &plane->surface, 0, 0); + uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, &plane->surface, 0, 0); fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset); @@ -1944,8 +1859,7 @@ radv_select_modifier(const struct radv_device *dev, VkFormat format, .dcc_retile = true, }; - ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format), - &mod_count, NULL); + ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format), &mod_count, NULL); uint64_t *mods = calloc(mod_count, sizeof(*mods)); @@ -1953,8 +1867,7 @@ radv_select_modifier(const struct radv_device *dev, VkFormat format, if (!mods) return mod_list->pDrmFormatModifiers[0]; - ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format), - &mod_count, mods); + ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format), &mod_count, mods); for (unsigned i = 0; i < mod_count; ++i) { for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) { @@ -1995,8 +1908,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_ radv_assert(pCreateInfo->extent.height > 0); radv_assert(pCreateInfo->extent.depth > 0); - image = - vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!image) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -2012,8 +1924,8 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_ pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT) image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u; else - image->queue_family_mask |= 1u << vk_queue_to_radv(device->physical_device, - pCreateInfo->pQueueFamilyIndices[i]); + image->queue_family_mask |= + 1u << vk_queue_to_radv(device->physical_device, pCreateInfo->pQueueFamilyIndices[i]); } const VkExternalMemoryImageCreateInfo *external_info = @@ -2027,13 +1939,11 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_ modifier = explicit_mod->drmFormatModifier; for (unsigned plane = 0; plane < plane_count; ++plane) { - image->planes[plane].surface.flags = - radv_get_surface_flags(device, image, plane, pCreateInfo, format); + image->planes[plane].surface.flags = radv_get_surface_flags(device, image, plane, pCreateInfo, format); image->planes[plane].surface.modifier = modifier; } - if (image->vk.external_handle_types & - VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID) { + if (image->vk.external_handle_types & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID) { #ifdef ANDROID image->vk.ahb_format = radv_ahb_format_for_vk_format(image->vk.format); #endif @@ -2054,10 +1964,8 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_ image->size = align64(image->size, image->alignment); image->bindings[0].offset = 0; - result = - device->ws->buffer_create(device->ws, image->size, image->alignment, 0, - RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0, - &image->bindings[0].bo); + result = device->ws->buffer_create(device->ws, image->size, image->alignment, 0, RADEON_FLAG_VIRTUAL, + RADV_BO_PRIORITY_VIRTUAL, 0, &image->bindings[0].bo); if (result != VK_SUCCESS) { radv_destroy_image(device, alloc, image); return vk_error(device, result); @@ -2078,8 +1986,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_ } static inline void -compute_non_block_compressed_view(struct radv_device *device, - const struct radv_image_view *iview, +compute_non_block_compressed_view(struct radv_device *device, const struct radv_image_view *iview, struct ac_surf_nbc_view *nbc_view) { const struct radv_image *image = iview->image; @@ -2087,16 +1994,14 @@ compute_non_block_compressed_view(struct radv_device *device, struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws); struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image); - ac_surface_compute_nbc_view(addrlib, &device->physical_device->rad_info, surf, &surf_info, - iview->vk.base_mip_level, iview->vk.base_array_layer, nbc_view); + ac_surface_compute_nbc_view(addrlib, &device->physical_device->rad_info, surf, &surf_info, iview->vk.base_mip_level, + iview->vk.base_array_layer, nbc_view); } static void -radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device, - VkFormat vk_format, const VkComponentMapping *components, - float min_lod, - bool is_storage_image, bool disable_compression, - bool enable_compression, unsigned plane_id, +radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device, VkFormat vk_format, + const VkComponentMapping *components, float min_lod, bool is_storage_image, + bool disable_compression, bool enable_compression, unsigned plane_id, unsigned descriptor_plane_id, VkImageCreateFlags img_create_flags, const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d) @@ -2117,8 +2022,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic assert(vk_format_get_plane_count(vk_format) == 1); assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0); - blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * - vk_format_get_blockwidth(vk_format); + blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format); if (device->physical_device->rad_info.gfx_level >= GFX9) { hw_level = iview->vk.base_mip_level; @@ -2132,15 +2036,14 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic } } - radv_make_texture_descriptor( - device, image, is_storage_image, iview->vk.view_type, vk_format, components, hw_level, - hw_level + iview->vk.level_count - 1, first_layer, - iview->vk.base_array_layer + iview->vk.layer_count - 1, - vk_format_get_plane_width(image->vk.format, plane_id, iview->extent.width), - vk_format_get_plane_height(image->vk.format, plane_id, iview->extent.height), - iview->extent.depth, min_lod, descriptor->plane_descriptors[descriptor_plane_id], - descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor, - img_create_flags, nbc_view, sliced_3d); + radv_make_texture_descriptor(device, image, is_storage_image, iview->vk.view_type, vk_format, components, hw_level, + hw_level + iview->vk.level_count - 1, first_layer, + iview->vk.base_array_layer + iview->vk.layer_count - 1, + vk_format_get_plane_width(image->vk.format, plane_id, iview->extent.width), + vk_format_get_plane_height(image->vk.format, plane_id, iview->extent.height), + iview->extent.depth, min_lod, descriptor->plane_descriptors[descriptor_plane_id], + descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor, + img_create_flags, nbc_view, sliced_3d); const struct legacy_surf_level *base_level_info = NULL; if (device->physical_device->rad_info.gfx_level <= GFX9) { @@ -2154,9 +2057,9 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic if (is_storage_image && !(enable_write_compression || enable_compression)) disable_compression = true; si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->vk.base_mip_level, - iview->vk.base_mip_level, blk_w, is_stencil, is_storage_image, - disable_compression, enable_write_compression, - descriptor->plane_descriptors[descriptor_plane_id], nbc_view); + iview->vk.base_mip_level, blk_w, is_stencil, is_storage_image, disable_compression, + enable_write_compression, descriptor->plane_descriptors[descriptor_plane_id], + nbc_view); } static unsigned @@ -2201,8 +2104,7 @@ radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask) * Determine if the given image view can be fast cleared. */ static bool -radv_image_view_can_fast_clear(const struct radv_device *device, - const struct radv_image_view *iview) +radv_image_view_can_fast_clear(const struct radv_device *device, const struct radv_image_view *iview) { struct radv_image *image; @@ -2227,8 +2129,7 @@ radv_image_view_can_fast_clear(const struct radv_device *device, void radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, - const VkImageViewCreateInfo *pCreateInfo, - VkImageCreateFlags img_create_flags, + const VkImageViewCreateInfo *pCreateInfo, VkImageCreateFlags img_create_flags, const struct radv_image_view_extra_create_info *extra_create_info) { RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image); @@ -2251,8 +2152,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, switch (image->vk.image_type) { case VK_IMAGE_TYPE_1D: case VK_IMAGE_TYPE_2D: - assert(range->baseArrayLayer + vk_image_subresource_layer_count(&image->vk, range) - 1 <= - image->vk.array_layers); + assert(range->baseArrayLayer + vk_image_subresource_layer_count(&image->vk, range) - 1 <= image->vk.array_layers); break; case VK_IMAGE_TYPE_3D: assert(range->baseArrayLayer + vk_image_subresource_layer_count(&image->vk, range) - 1 <= @@ -2345,8 +2245,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, * block compatible format and the compressed format, so even if we take * the plain converted dimensions the physical layout is correct. */ - if (device->physical_device->rad_info.gfx_level >= GFX9 && - vk_format_is_block_compressed(image->vk.format) && + if (device->physical_device->rad_info.gfx_level >= GFX9 && vk_format_is_block_compressed(image->vk.format) && !vk_format_is_block_compressed(iview->vk.format)) { /* If we have multiple levels in the view we should ideally take the last level, * but the mip calculation has a max(..., 1) so walking back to the base mip in an @@ -2389,11 +2288,11 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, for (unsigned i = 0; i < plane_count; ++i) { VkFormat format = vk_format_get_plane_format(iview->vk.view_format, i); radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, min_lod, false, - disable_compression, enable_compression, iview->plane_id + i, - i, img_create_flags, &iview->nbc_view, NULL); + disable_compression, enable_compression, iview->plane_id + i, i, img_create_flags, + &iview->nbc_view, NULL); radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, min_lod, true, - disable_compression, enable_compression, iview->plane_id + i, - i, img_create_flags, &iview->nbc_view, sliced_3d); + disable_compression, enable_compression, iview->plane_id + i, i, img_create_flags, + &iview->nbc_view, sliced_3d); } } @@ -2404,8 +2303,8 @@ radv_image_view_finish(struct radv_image_view *iview) } bool -radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image, - VkImageLayout layout, unsigned queue_mask) +radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout, + unsigned queue_mask) { switch (layout) { case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: @@ -2440,8 +2339,7 @@ radv_layout_is_htile_compressed(const struct radv_device *device, const struct r case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL: if (radv_image_is_tc_compat_htile(image) || (radv_image_has_htile(image) && - !(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) { + !(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) { /* Keep HTILE compressed if the image is only going to * be used as a depth/stencil read-only attachment. */ @@ -2456,19 +2354,16 @@ radv_layout_is_htile_compressed(const struct radv_device *device, const struct r } bool -radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image, - unsigned level, VkImageLayout layout, - unsigned queue_mask) +radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image, unsigned level, + VkImageLayout layout, unsigned queue_mask) { - if (radv_dcc_enabled(image, level) && - !radv_layout_dcc_compressed(device, image, level, layout, queue_mask)) + if (radv_dcc_enabled(image, level) && !radv_layout_dcc_compressed(device, image, level, layout, queue_mask)) return false; if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS)) return false; - if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && - layout != VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL) + if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && layout != VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL) return false; /* Exclusive images with CMASK or DCC can always be fast-cleared on the gfx queue. Concurrent @@ -2479,14 +2374,13 @@ radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_i } bool -radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, - unsigned level, VkImageLayout layout, unsigned queue_mask) +radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, unsigned level, + VkImageLayout layout, unsigned queue_mask) { if (!radv_dcc_enabled(image, level)) return false; - if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && - queue_mask & (1u << RADV_QUEUE_FOREIGN)) + if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && queue_mask & (1u << RADV_QUEUE_FOREIGN)) return true; /* If the image is read-only, we can always just keep it compressed */ @@ -2509,8 +2403,8 @@ radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_i } enum radv_fmask_compression -radv_layout_fmask_compression(const struct radv_device *device, const struct radv_image *image, - VkImageLayout layout, unsigned queue_mask) +radv_layout_fmask_compression(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout, + unsigned queue_mask) { if (!radv_image_has_fmask(image)) return RADV_FMASK_COMPRESSION_NONE; @@ -2538,14 +2432,12 @@ radv_layout_fmask_compression(const struct radv_device *device, const struct rad return RADV_FMASK_COMPRESSION_NONE; default: /* Don't compress images that are concurrent. */ - return queue_mask == (1u << RADV_QUEUE_GENERAL) ? - RADV_FMASK_COMPRESSION_FULL : RADV_FMASK_COMPRESSION_NONE; + return queue_mask == (1u << RADV_QUEUE_GENERAL) ? RADV_FMASK_COMPRESSION_FULL : RADV_FMASK_COMPRESSION_NONE; } } unsigned -radv_image_queue_family_mask(const struct radv_image *image, - enum radv_queue_family family, +radv_image_queue_family_mask(const struct radv_image *image, enum radv_queue_family family, enum radv_queue_family queue_family) { if (!image->exclusive) @@ -2558,12 +2450,11 @@ radv_image_queue_family_mask(const struct radv_image *image, } VKAPI_ATTR VkResult VKAPI_CALL -radv_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, VkImage *pImage) +radv_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, + VkImage *pImage) { #ifdef ANDROID - const VkNativeBufferANDROID *gralloc_info = - vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID); + const VkNativeBufferANDROID *gralloc_info = vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID); if (gralloc_info) return radv_image_from_gralloc(_device, pCreateInfo, gralloc_info, pAllocator, pImage); @@ -2577,15 +2468,12 @@ radv_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo, const VkImageSwapchainCreateInfoKHR *swapchain_info = vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR); if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) { - return wsi_common_create_swapchain_image(device->physical_device->vk.wsi_device, - pCreateInfo, - swapchain_info->swapchain, - pImage); + return wsi_common_create_swapchain_image(device->physical_device->vk.wsi_device, pCreateInfo, + swapchain_info->swapchain, pImage); } #endif - const struct wsi_image_create_info *wsi_info = - vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA); + const struct wsi_image_create_info *wsi_info = vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA); bool scanout = wsi_info && wsi_info->scanout; bool prime_blit_src = wsi_info && wsi_info->blit_src; @@ -2611,8 +2499,8 @@ radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks } VKAPI_ATTR void VKAPI_CALL -radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image, - const VkImageSubresource *pSubresource, VkSubresourceLayout *pLayout) +radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image, const VkImageSubresource *pSubresource, + VkSubresourceLayout *pLayout) { RADV_FROM_HANDLE(radv_image, image, _image); RADV_FROM_HANDLE(radv_device, device, _device); @@ -2632,21 +2520,20 @@ radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image, assert(level == 0); assert(layer == 0); - pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, - surface, mem_plane_id, 0); - pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.gfx_level, - surface, mem_plane_id, level); + pLayout->offset = + ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, surface, mem_plane_id, 0); + pLayout->rowPitch = + ac_surface_get_plane_stride(device->physical_device->rad_info.gfx_level, surface, mem_plane_id, level); pLayout->arrayPitch = 0; pLayout->depthPitch = 0; pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id); } else if (device->physical_device->rad_info.gfx_level >= GFX9) { uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0; - pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, - &plane->surface, 0, layer) + - level_offset; - if (image->vk.format == VK_FORMAT_R32G32B32_UINT || - image->vk.format == VK_FORMAT_R32G32B32_SINT || + pLayout->offset = + ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, &plane->surface, 0, layer) + + level_offset; + if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT || image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) { /* Adjust the number of bytes between each row because * the pitch is actually the number of components per @@ -2654,8 +2541,7 @@ radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image, */ pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3; } else { - uint32_t pitch = - surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch; + uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch; assert(util_is_power_of_two_nonzero(surface->bpe)); pLayout->rowPitch = pitch * surface->bpe; @@ -2696,8 +2582,7 @@ radv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, RADV_FROM_HANDLE(radv_device, device, _device); struct radv_image_view *view; - view = - vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (view == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -2734,8 +2619,7 @@ radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device, view->bo = buffer->bo; view->range = vk_buffer_range(&buffer->vk, pCreateInfo->offset, pCreateInfo->range); - radv_make_texel_buffer_descriptor(device, va, pCreateInfo->format, pCreateInfo->offset, - view->range, view->state); + radv_make_texel_buffer_descriptor(device, va, pCreateInfo->format, pCreateInfo->offset, view->range, view->state); } void @@ -2751,8 +2635,7 @@ radv_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInf RADV_FROM_HANDLE(radv_device, device, _device); struct radv_buffer_view *view; - view = - vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!view) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -2764,8 +2647,7 @@ radv_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInf } VKAPI_ATTR void VKAPI_CALL -radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, - const VkAllocationCallbacks *pAllocator) +radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, const VkAllocationCallbacks *pAllocator) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_buffer_view, view, bufferView); diff --git a/src/amd/vulkan/radv_instance.c b/src/amd/vulkan/radv_instance.c index 7be8b5a..dbd0601 100644 --- a/src/amd/vulkan/radv_instance.c +++ b/src/amd/vulkan/radv_instance.c @@ -33,52 +33,51 @@ #include "vk_instance.h" #include "vk_util.h" -static const struct debug_control radv_debug_options[] = { - {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS}, - {"nodcc", RADV_DEBUG_NO_DCC}, - {"shaders", RADV_DEBUG_DUMP_SHADERS}, - {"nocache", RADV_DEBUG_NO_CACHE}, - {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS}, - {"nohiz", RADV_DEBUG_NO_HIZ}, - {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE}, - {"allbos", RADV_DEBUG_ALL_BOS}, - {"noibs", RADV_DEBUG_NO_IBS}, - {"spirv", RADV_DEBUG_DUMP_SPIRV}, - {"vmfaults", RADV_DEBUG_VM_FAULTS}, - {"zerovram", RADV_DEBUG_ZERO_VRAM}, - {"syncshaders", RADV_DEBUG_SYNC_SHADERS}, - {"preoptir", RADV_DEBUG_PREOPTIR}, - {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS}, - {"info", RADV_DEBUG_INFO}, - {"startup", RADV_DEBUG_STARTUP}, - {"checkir", RADV_DEBUG_CHECKIR}, - {"nobinning", RADV_DEBUG_NOBINNING}, - {"nongg", RADV_DEBUG_NO_NGG}, - {"metashaders", RADV_DEBUG_DUMP_META_SHADERS}, - {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE}, - {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE}, - {"llvm", RADV_DEBUG_LLVM}, - {"forcecompress", RADV_DEBUG_FORCE_COMPRESS}, - {"hang", RADV_DEBUG_HANG}, - {"img", RADV_DEBUG_IMG}, - {"noumr", RADV_DEBUG_NO_UMR}, - {"invariantgeom", RADV_DEBUG_INVARIANT_GEOM}, - {"splitfma", RADV_DEBUG_SPLIT_FMA}, - {"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC}, - {"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK}, - {"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING}, - {"noatocdithering", RADV_DEBUG_NO_ATOC_DITHERING}, - {"nonggc", RADV_DEBUG_NO_NGGC}, - {"prologs", RADV_DEBUG_DUMP_PROLOGS}, - {"nodma", RADV_DEBUG_NO_DMA_BLIT}, - {"epilogs", RADV_DEBUG_DUMP_EPILOGS}, - {"nofmask", RADV_DEBUG_NO_FMASK}, - {"shadowregs", RADV_DEBUG_SHADOW_REGS}, - {"extra_md", RADV_DEBUG_EXTRA_MD}, - {"nogpl", RADV_DEBUG_NO_GPL}, - {"videoarraypath", RADV_DEBUG_VIDEO_ARRAY_PATH}, - {"nort", RADV_DEBUG_NO_RT}, - {NULL, 0}}; +static const struct debug_control radv_debug_options[] = {{"nofastclears", RADV_DEBUG_NO_FAST_CLEARS}, + {"nodcc", RADV_DEBUG_NO_DCC}, + {"shaders", RADV_DEBUG_DUMP_SHADERS}, + {"nocache", RADV_DEBUG_NO_CACHE}, + {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS}, + {"nohiz", RADV_DEBUG_NO_HIZ}, + {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE}, + {"allbos", RADV_DEBUG_ALL_BOS}, + {"noibs", RADV_DEBUG_NO_IBS}, + {"spirv", RADV_DEBUG_DUMP_SPIRV}, + {"vmfaults", RADV_DEBUG_VM_FAULTS}, + {"zerovram", RADV_DEBUG_ZERO_VRAM}, + {"syncshaders", RADV_DEBUG_SYNC_SHADERS}, + {"preoptir", RADV_DEBUG_PREOPTIR}, + {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS}, + {"info", RADV_DEBUG_INFO}, + {"startup", RADV_DEBUG_STARTUP}, + {"checkir", RADV_DEBUG_CHECKIR}, + {"nobinning", RADV_DEBUG_NOBINNING}, + {"nongg", RADV_DEBUG_NO_NGG}, + {"metashaders", RADV_DEBUG_DUMP_META_SHADERS}, + {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE}, + {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE}, + {"llvm", RADV_DEBUG_LLVM}, + {"forcecompress", RADV_DEBUG_FORCE_COMPRESS}, + {"hang", RADV_DEBUG_HANG}, + {"img", RADV_DEBUG_IMG}, + {"noumr", RADV_DEBUG_NO_UMR}, + {"invariantgeom", RADV_DEBUG_INVARIANT_GEOM}, + {"splitfma", RADV_DEBUG_SPLIT_FMA}, + {"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC}, + {"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK}, + {"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING}, + {"noatocdithering", RADV_DEBUG_NO_ATOC_DITHERING}, + {"nonggc", RADV_DEBUG_NO_NGGC}, + {"prologs", RADV_DEBUG_DUMP_PROLOGS}, + {"nodma", RADV_DEBUG_NO_DMA_BLIT}, + {"epilogs", RADV_DEBUG_DUMP_EPILOGS}, + {"nofmask", RADV_DEBUG_NO_FMASK}, + {"shadowregs", RADV_DEBUG_SHADOW_REGS}, + {"extra_md", RADV_DEBUG_EXTRA_MD}, + {"nogpl", RADV_DEBUG_NO_GPL}, + {"videoarraypath", RADV_DEBUG_VIDEO_ARRAY_PATH}, + {"nort", RADV_DEBUG_NO_RT}, + {NULL, 0}}; const char * radv_get_debug_option_name(int id) @@ -87,23 +86,22 @@ radv_get_debug_option_name(int id) return radv_debug_options[id].string; } -static const struct debug_control radv_perftest_options[] = { - {"localbos", RADV_PERFTEST_LOCAL_BOS}, - {"dccmsaa", RADV_PERFTEST_DCC_MSAA}, - {"bolist", RADV_PERFTEST_BO_LIST}, - {"cswave32", RADV_PERFTEST_CS_WAVE_32}, - {"pswave32", RADV_PERFTEST_PS_WAVE_32}, - {"gewave32", RADV_PERFTEST_GE_WAVE_32}, - {"nosam", RADV_PERFTEST_NO_SAM}, - {"sam", RADV_PERFTEST_SAM}, - {"rt", RADV_PERFTEST_RT}, - {"nggc", RADV_PERFTEST_NGGC}, - {"emulate_rt", RADV_PERFTEST_EMULATE_RT}, - {"rtwave64", RADV_PERFTEST_RT_WAVE_64}, - {"ngg_streamout", RADV_PERFTEST_NGG_STREAMOUT}, - {"video_decode", RADV_PERFTEST_VIDEO_DECODE}, - {"dmashaders", RADV_PERFTEST_DMA_SHADERS}, - {NULL, 0}}; +static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_PERFTEST_LOCAL_BOS}, + {"dccmsaa", RADV_PERFTEST_DCC_MSAA}, + {"bolist", RADV_PERFTEST_BO_LIST}, + {"cswave32", RADV_PERFTEST_CS_WAVE_32}, + {"pswave32", RADV_PERFTEST_PS_WAVE_32}, + {"gewave32", RADV_PERFTEST_GE_WAVE_32}, + {"nosam", RADV_PERFTEST_NO_SAM}, + {"sam", RADV_PERFTEST_SAM}, + {"rt", RADV_PERFTEST_RT}, + {"nggc", RADV_PERFTEST_NGGC}, + {"emulate_rt", RADV_PERFTEST_EMULATE_RT}, + {"rtwave64", RADV_PERFTEST_RT_WAVE_64}, + {"ngg_streamout", RADV_PERFTEST_NGG_STREAMOUT}, + {"video_decode", RADV_PERFTEST_VIDEO_DECODE}, + {"dmashaders", RADV_PERFTEST_DMA_SHADERS}, + {NULL, 0}}; const char * radv_get_perftest_option_name(int id) @@ -156,20 +154,16 @@ static const driOptionDescription radv_dri_options[] = { static void radv_init_dri_options(struct radv_instance *instance) { - driParseOptionInfo(&instance->available_dri_options, radv_dri_options, - ARRAY_SIZE(radv_dri_options)); - driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "radv", NULL, - NULL, instance->vk.app_info.app_name, instance->vk.app_info.app_version, + driParseOptionInfo(&instance->available_dri_options, radv_dri_options, ARRAY_SIZE(radv_dri_options)); + driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "radv", NULL, NULL, + instance->vk.app_info.app_name, instance->vk.app_info.app_version, instance->vk.app_info.engine_name, instance->vk.app_info.engine_version); - instance->enable_mrt_output_nan_fixup = - driQueryOptionb(&instance->dri_options, "radv_enable_mrt_output_nan_fixup"); + instance->enable_mrt_output_nan_fixup = driQueryOptionb(&instance->dri_options, "radv_enable_mrt_output_nan_fixup"); - instance->disable_shrink_image_store = - driQueryOptionb(&instance->dri_options, "radv_disable_shrink_image_store"); + instance->disable_shrink_image_store = driQueryOptionb(&instance->dri_options, "radv_disable_shrink_image_store"); - instance->absolute_depth_bias = - driQueryOptionb(&instance->dri_options, "radv_absolute_depth_bias"); + instance->absolute_depth_bias = driQueryOptionb(&instance->dri_options, "radv_absolute_depth_bias"); instance->disable_tc_compat_htile_in_general = driQueryOptionb(&instance->dri_options, "radv_disable_tc_compat_htile_general"); @@ -191,17 +185,14 @@ radv_init_dri_options(struct radv_instance *instance) instance->zero_vram = driQueryOptionb(&instance->dri_options, "radv_zero_vram"); - instance->disable_aniso_single_level = - driQueryOptionb(&instance->dri_options, "radv_disable_aniso_single_level"); + instance->disable_aniso_single_level = driQueryOptionb(&instance->dri_options, "radv_disable_aniso_single_level"); instance->disable_sinking_load_input_fs = driQueryOptionb(&instance->dri_options, "radv_disable_sinking_load_input_fs"); - instance->flush_before_query_copy = - driQueryOptionb(&instance->dri_options, "radv_flush_before_query_copy"); + instance->flush_before_query_copy = driQueryOptionb(&instance->dri_options, "radv_flush_before_query_copy"); - instance->enable_unified_heap_on_apu = - driQueryOptionb(&instance->dri_options, "radv_enable_unified_heap_on_apu"); + instance->enable_unified_heap_on_apu = driQueryOptionb(&instance->dri_options, "radv_enable_unified_heap_on_apu"); instance->tex_non_uniform = driQueryOptionb(&instance->dri_options, "radv_tex_non_uniform"); @@ -251,8 +242,8 @@ static const struct vk_instance_extension_table radv_instance_extensions_support }; VKAPI_ATTR VkResult VKAPI_CALL -radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, VkInstance *pInstance) +radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, + VkInstance *pInstance) { struct radv_instance *instance; VkResult result; @@ -268,8 +259,8 @@ radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &radv_instance_entrypoints, true); vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &wsi_instance_entrypoints, false); - result = vk_instance_init(&instance->vk, &radv_instance_extensions_supported, &dispatch_table, - pCreateInfo, pAllocator); + result = + vk_instance_init(&instance->vk, &radv_instance_extensions_supported, &dispatch_table, pCreateInfo, pAllocator); if (result != VK_SUCCESS) { vk_free(pAllocator, instance); return vk_error(NULL, result); @@ -325,8 +316,7 @@ radv_EnumerateInstanceExtensionProperties(const char *pLayerName, uint32_t *pPro if (pLayerName) return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); - return vk_enumerate_instance_extension_properties(&radv_instance_extensions_supported, - pPropertyCount, pProperties); + return vk_enumerate_instance_extension_properties(&radv_instance_extensions_supported, pPropertyCount, pProperties); } VKAPI_ATTR VkResult VKAPI_CALL diff --git a/src/amd/vulkan/radv_llvm_helper.cpp b/src/amd/vulkan/radv_llvm_helper.cpp index 0fedf5f..9d13860 100644 --- a/src/amd/vulkan/radv_llvm_helper.cpp +++ b/src/amd/vulkan/radv_llvm_helper.cpp @@ -26,8 +26,8 @@ #include class radv_llvm_per_thread_info { public: - radv_llvm_per_thread_info(enum radeon_family arg_family, - enum ac_target_machine_options arg_tm_options, unsigned arg_wave_size) + radv_llvm_per_thread_info(enum radeon_family arg_family, enum ac_target_machine_options arg_tm_options, + unsigned arg_wave_size) : family(arg_family), tm_options(arg_tm_options), wave_size(arg_wave_size), passes(NULL) { } @@ -54,8 +54,7 @@ class radv_llvm_per_thread_info { return ac_compile_module_to_elf(passes, module, pelf_buffer, pelf_size); } - bool is_same(enum radeon_family arg_family, enum ac_target_machine_options arg_tm_options, - unsigned arg_wave_size) + bool is_same(enum radeon_family arg_family, enum ac_target_machine_options arg_tm_options, unsigned arg_wave_size) { if (arg_family == family && arg_tm_options == tm_options && arg_wave_size == wave_size) return true; @@ -74,8 +73,7 @@ class radv_llvm_per_thread_info { static thread_local std::list radv_llvm_per_thread_list; bool -radv_compile_to_elf(struct ac_llvm_compiler *info, LLVMModuleRef module, char **pelf_buffer, - size_t *pelf_size) +radv_compile_to_elf(struct ac_llvm_compiler *info, LLVMModuleRef module, char **pelf_buffer, size_t *pelf_size) { radv_llvm_per_thread_info *thread_info = nullptr; diff --git a/src/amd/vulkan/radv_llvm_helper.h b/src/amd/vulkan/radv_llvm_helper.h index e57d72d..489ae69 100644 --- a/src/amd/vulkan/radv_llvm_helper.h +++ b/src/amd/vulkan/radv_llvm_helper.h @@ -32,8 +32,7 @@ extern "C" { bool radv_init_llvm_compiler(struct ac_llvm_compiler *info, enum radeon_family family, enum ac_target_machine_options tm_options, unsigned wave_size); -bool radv_compile_to_elf(struct ac_llvm_compiler *info, LLVMModuleRef module, char **pelf_buffer, - size_t *pelf_size); +bool radv_compile_to_elf(struct ac_llvm_compiler *info, LLVMModuleRef module, char **pelf_buffer, size_t *pelf_size); #ifdef __cplusplus } diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 4ecf8c9..a45f297 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -33,8 +33,8 @@ #include "radv_shader_args.h" #include "ac_binary.h" -#include "ac_nir.h" #include "ac_llvm_build.h" +#include "ac_nir.h" #include "ac_nir_to_llvm.h" #include "ac_shader_abi.h" #include "ac_shader_util.h" @@ -96,8 +96,7 @@ load_descriptor_sets(struct radv_shader_context *ctx) while (mask) { int i = u_bit_scan(&mask); - ctx->descriptor_sets[i] = - ac_build_load_to_sgpr(&ctx->ac, desc_sets, LLVMConstInt(ctx->ac.i32, i, false)); + ctx->descriptor_sets[i] = ac_build_load_to_sgpr(&ctx->ac, desc_sets, LLVMConstInt(ctx->ac.i32, i, false)); LLVMSetAlignment(ctx->descriptor_sets[i], 4); } } else { @@ -152,15 +151,13 @@ create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has } } - ctx->main_function = - create_llvm_function(&ctx->ac, ctx->ac.module, ctx->ac.builder, &ctx->args->ac, - get_llvm_calling_convention(ctx->main_function.value, stage), - ctx->max_workgroup_size, ctx->options); + ctx->main_function = create_llvm_function(&ctx->ac, ctx->ac.module, ctx->ac.builder, &ctx->args->ac, + get_llvm_calling_convention(ctx->main_function.value, stage), + ctx->max_workgroup_size, ctx->options); load_descriptor_sets(ctx); - if (stage == MESA_SHADER_TESS_CTRL || - (stage == MESA_SHADER_VERTEX && ctx->shader_info->vs.as_ls) || + if (stage == MESA_SHADER_TESS_CTRL || (stage == MESA_SHADER_VERTEX && ctx->shader_info->vs.as_ls) || ctx->shader_info->is_ngg || /* GFX9 has the ESGS ring buffer in LDS. */ (stage == MESA_SHADER_GEOMETRY && has_previous_stage)) { @@ -209,8 +206,7 @@ radv_load_ssbo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr, bool write, b } static LLVMValueRef -radv_get_sampler_desc(struct ac_shader_abi *abi, LLVMValueRef index, - enum ac_descriptor_type desc_type) +radv_get_sampler_desc(struct ac_shader_abi *abi, LLVMValueRef index, enum ac_descriptor_type desc_type) { struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); @@ -218,8 +214,7 @@ radv_get_sampler_desc(struct ac_shader_abi *abi, LLVMValueRef index, * use the tail from plane 1 so that we can store only the first 16 bytes * of the last plane. */ if (desc_type == AC_DESC_PLANE_2 && index && LLVMTypeOf(index) == ctx->ac.i32) { - LLVMValueRef plane1_addr = - LLVMBuildSub(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 32, false), ""); + LLVMValueRef plane1_addr = LLVMBuildSub(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 32, false), ""); LLVMValueRef descriptor1 = radv_load_rsrc(ctx, plane1_addr, ctx->ac.v8i32); LLVMValueRef descriptor2 = radv_load_rsrc(ctx, index, ctx->ac.v4i32); @@ -237,8 +232,8 @@ radv_get_sampler_desc(struct ac_shader_abi *abi, LLVMValueRef index, } static void -scan_shader_output_decl(struct radv_shader_context *ctx, struct nir_variable *variable, - struct nir_shader *shader, gl_shader_stage stage) +scan_shader_output_decl(struct radv_shader_context *ctx, struct nir_variable *variable, struct nir_shader *shader, + gl_shader_stage stage) { int idx = variable->data.driver_location; unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); @@ -276,8 +271,7 @@ static void prepare_gs_input_vgprs(struct radv_shader_context *ctx, bool merged) { if (merged) { - ctx->gs_wave_id = - ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 16, 8); + ctx->gs_wave_id = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 16, 8); } else { ctx->gs_wave_id = ac_get_arg(&ctx->ac, ctx->args->ac.gs_wave_id); } @@ -293,13 +287,14 @@ declare_esgs_ring(struct radv_shader_context *ctx) { assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring")); - LLVMValueRef esgs_ring = LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0), - "esgs_ring", AC_ADDR_SPACE_LDS); + LLVMValueRef esgs_ring = + LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0), "esgs_ring", AC_ADDR_SPACE_LDS); LLVMSetLinkage(esgs_ring, LLVMExternalLinkage); LLVMSetAlignment(esgs_ring, 64 * 1024); } -static LLVMValueRef radv_intrinsic_load(struct ac_shader_abi *abi, nir_intrinsic_instr *intrin) +static LLVMValueRef +radv_intrinsic_load(struct ac_shader_abi *abi, nir_intrinsic_instr *intrin) { switch (intrin->intrinsic) { case nir_intrinsic_load_base_vertex: @@ -311,10 +306,8 @@ static LLVMValueRef radv_intrinsic_load(struct ac_shader_abi *abi, nir_intrinsic } static LLVMModuleRef -ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, - const struct radv_nir_compiler_options *options, - const struct radv_shader_info *info, - struct nir_shader *const *shaders, int shader_count, +ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, const struct radv_nir_compiler_options *options, + const struct radv_shader_info *info, struct nir_shader *const *shaders, int shader_count, const struct radv_shader_args *args) { struct radv_shader_context ctx = {0}; @@ -335,8 +328,8 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, exports_color_null = !exports_mrtz || (shaders[0]->info.outputs_written & (0xffu << FRAG_RESULT_DATA0)); } - ac_llvm_context_init(&ctx.ac, ac_llvm, options->info, - float_mode, info->wave_size, info->ballot_bit_size, exports_color_null, exports_mrtz); + ac_llvm_context_init(&ctx.ac, ac_llvm, options->info, float_mode, info->wave_size, info->ballot_bit_size, + exports_color_null, exports_mrtz); uint32_t length = 1; for (uint32_t i = 0; i < shader_count; i++) @@ -384,8 +377,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, if (args->ac.instance_id.used) ctx.abi.instance_id = ac_get_arg(&ctx.ac, args->ac.instance_id); - if (options->info->has_ls_vgpr_init_bug && - shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL) + if (options->info->has_ls_vgpr_init_bug && shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL) ac_fixup_ls_hs_input_vgprs(&ctx.ac, &ctx.abi, &args->ac); if (is_ngg) { @@ -441,8 +433,8 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ac_build_s_barrier(&ctx.ac, shaders[shader_idx]->info.stage); } - nir_foreach_shader_out_variable(variable, shaders[shader_idx]) scan_shader_output_decl( - &ctx, variable, shaders[shader_idx], shaders[shader_idx]->info.stage); + nir_foreach_shader_out_variable (variable, shaders[shader_idx]) + scan_shader_output_decl(&ctx, variable, shaders[shader_idx], shaders[shader_idx]->info.stage); bool check_merged_wave_info = shader_count >= 2 && !(is_ngg && shader_idx == 1); LLVMBasicBlockRef merge_block = NULL; @@ -452,8 +444,8 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, ""); merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, ""); - LLVMValueRef count = ac_unpack_param( - &ctx.ac, ac_get_arg(&ctx.ac, args->ac.merged_wave_info), 8 * shader_idx, 8); + LLVMValueRef count = + ac_unpack_param(&ctx.ac, ac_get_arg(&ctx.ac, args->ac.merged_wave_info), 8 * shader_idx, 8); LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac); LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT, thread_id, count, ""); LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block); @@ -477,8 +469,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, LLVMBuildRetVoid(ctx.ac.builder); if (options->dump_preoptir) { - fprintf(stderr, "%s LLVM IR:\n\n", - radv_get_shader_name(info, shaders[shader_count - 1]->info.stage)); + fprintf(stderr, "%s LLVM IR:\n\n", radv_get_shader_name(info, shaders[shader_count - 1]->info.stage)); ac_dump_module(ctx.ac.module); fprintf(stderr, "\n"); } @@ -506,8 +497,7 @@ ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) } static unsigned -radv_llvm_compile(LLVMModuleRef M, char **pelf_buffer, size_t *pelf_size, - struct ac_llvm_compiler *ac_llvm) +radv_llvm_compile(LLVMModuleRef M, char **pelf_buffer, size_t *pelf_size, struct ac_llvm_compiler *ac_llvm) { unsigned retval = 0; LLVMContextRef llvm_ctx; @@ -524,9 +514,8 @@ radv_llvm_compile(LLVMModuleRef M, char **pelf_buffer, size_t *pelf_size, } static void -ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_module, - struct radv_shader_binary **rbinary, const char *name, - const struct radv_nir_compiler_options *options) +ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_module, struct radv_shader_binary **rbinary, + const char *name, const struct radv_nir_compiler_options *options) { char *elf_buffer = NULL; size_t elf_size = 0; @@ -571,26 +560,22 @@ ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_modu } static void -radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, - const struct radv_nir_compiler_options *options, - const struct radv_shader_info *info, - struct radv_shader_binary **rbinary, - const struct radv_shader_args *args, struct nir_shader *const *nir, - int nir_count) +radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, const struct radv_nir_compiler_options *options, + const struct radv_shader_info *info, struct radv_shader_binary **rbinary, + const struct radv_shader_args *args, struct nir_shader *const *nir, int nir_count) { LLVMModuleRef llvm_module; llvm_module = ac_translate_nir_to_llvm(ac_llvm, options, info, nir, nir_count, args); - ac_compile_llvm_module(ac_llvm, llvm_module, rbinary, - radv_get_shader_name(info, nir[nir_count - 1]->info.stage), options); + ac_compile_llvm_module(ac_llvm, llvm_module, rbinary, radv_get_shader_name(info, nir[nir_count - 1]->info.stage), + options); } void -llvm_compile_shader(const struct radv_nir_compiler_options *options, - const struct radv_shader_info *info, unsigned shader_count, - struct nir_shader *const *shaders, struct radv_shader_binary **binary, +llvm_compile_shader(const struct radv_nir_compiler_options *options, const struct radv_shader_info *info, + unsigned shader_count, struct nir_shader *const *shaders, struct radv_shader_binary **binary, const struct radv_shader_args *args) { enum ac_target_machine_options tm_options = 0; diff --git a/src/amd/vulkan/radv_perfcounter.c b/src/amd/vulkan/radv_perfcounter.c index 64f55a7..f7d7c34 100644 --- a/src/amd/vulkan/radv_perfcounter.c +++ b/src/amd/vulkan/radv_perfcounter.c @@ -38,13 +38,11 @@ radv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders) } static void -radv_emit_windowed_counters(struct radv_device *device, struct radeon_cmdbuf *cs, int family, - bool enable) +radv_emit_windowed_counters(struct radv_device *device, struct radeon_cmdbuf *cs, int family, bool enable) { if (family == RADV_QUEUE_GENERAL) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cs, EVENT_TYPE(enable ? V_028A90_PERFCOUNTER_START : V_028A90_PERFCOUNTER_STOP) | - EVENT_INDEX(0)); + radeon_emit(cs, EVENT_TYPE(enable ? V_028A90_PERFCOUNTER_START : V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0)); } radeon_set_sh_reg(cs, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(enable)); @@ -54,8 +52,8 @@ void radv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs) { radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, - S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | - S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET)); + S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | + S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET)); } void @@ -63,8 +61,8 @@ radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf { /* Start SPM counters. */ radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, - S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | - S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING)); + S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | + S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING)); radv_emit_windowed_counters(device, cs, family, true); } @@ -76,10 +74,10 @@ radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf /* Stop SPM counters. */ radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, - S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | - S_036020_SPM_PERFMON_STATE(device->physical_device->rad_info.never_stop_sq_perf_counters ? - V_036020_STRM_PERFMON_STATE_START_COUNTING : - V_036020_STRM_PERFMON_STATE_STOP_COUNTING)); + S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | + S_036020_SPM_PERFMON_STATE(device->physical_device->rad_info.never_stop_sq_perf_counters + ? V_036020_STRM_PERFMON_STATE_START_COUNTING + : V_036020_STRM_PERFMON_STATE_STOP_COUNTING)); } enum radv_perfcounter_op { @@ -144,20 +142,19 @@ struct radv_perfcounter_desc { enum radv_perfcounter_uuid uuid; }; -#define PC_DESC(arg_op, arg_unit, arg_name, arg_category, arg_description, arg_uuid, ...) \ - (struct radv_perfcounter_desc) \ - { \ - .impl = {.op = arg_op, .regs = {__VA_ARGS__}}, \ - .unit = VK_PERFORMANCE_COUNTER_UNIT_##arg_unit##_KHR, .name = arg_name, \ - .category = arg_category, .description = arg_description, .uuid = RADV_PC_UUID_##arg_uuid \ +#define PC_DESC(arg_op, arg_unit, arg_name, arg_category, arg_description, arg_uuid, ...) \ + (struct radv_perfcounter_desc) \ + { \ + .impl = {.op = arg_op, .regs = {__VA_ARGS__}}, .unit = VK_PERFORMANCE_COUNTER_UNIT_##arg_unit##_KHR, \ + .name = arg_name, .category = arg_category, .description = arg_description, .uuid = RADV_PC_UUID_##arg_uuid \ } -#define ADD_PC(op, unit, name, category, description, uuid, ...) \ - do { \ - if (descs) { \ - descs[*count] = PC_DESC((op), unit, name, category, description, uuid, __VA_ARGS__); \ - } \ - ++*count; \ +#define ADD_PC(op, unit, name, category, description, uuid, ...) \ + do { \ + if (descs) { \ + descs[*count] = PC_DESC((op), unit, name, category, description, uuid, __VA_ARGS__); \ + } \ + ++*count; \ } while (0) #define CTR(block, ctr) (S_REG_BLOCK(block) | S_REG_SEL(ctr)) #define CONSTANT(v) (S_REG_CONSTANT(1) | (uint32_t)(v)) @@ -209,87 +206,70 @@ enum { TCP_PERF_SEL_REQ_MISS_GFX10 = CTR(TCP, 0x12), }; -#define CTR_NUM_SIMD \ - CONSTANT(pdev->rad_info.num_simd_per_compute_unit * pdev->rad_info.num_cu) -#define CTR_NUM_CUS CONSTANT(pdev->rad_info.num_cu) +#define CTR_NUM_SIMD CONSTANT(pdev->rad_info.num_simd_per_compute_unit * pdev->rad_info.num_cu) +#define CTR_NUM_CUS CONSTANT(pdev->rad_info.num_cu) static void -radv_query_perfcounter_descs(struct radv_physical_device *pdev, uint32_t *count, - struct radv_perfcounter_desc *descs) +radv_query_perfcounter_descs(struct radv_physical_device *pdev, uint32_t *count, struct radv_perfcounter_desc *descs) { *count = 0; - ADD_PC(RADV_PC_OP_MAX, CYCLES, "GPU active cycles", "GRBM", - "cycles the GPU is active processing a command buffer.", GPU_CYCLES, - GRBM_PERF_SEL_GUI_ACTIVE); - - ADD_PC(RADV_PC_OP_SUM, GENERIC, "Waves", "Shaders", "Number of waves executed", SHADER_WAVES, - SQ_PERF_SEL_WAVES); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "Instructions", "Shaders", "Number of Instructions executed", - SHADER_INSTRUCTIONS, SQ_PERF_SEL_INSTS_ALL_GFX10); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "VALU Instructions", "Shaders", - "Number of VALU Instructions executed", SHADER_INSTRUCTIONS_VALU, - SQ_PERF_SEL_INSTS_VALU_GFX10); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "SALU Instructions", "Shaders", - "Number of SALU Instructions executed", SHADER_INSTRUCTIONS_SALU, - SQ_PERF_SEL_INSTS_SALU_GFX10); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Load Instructions", "Shaders", - "Number of VMEM load instructions executed", SHADER_INSTRUCTIONS_VMEM_LOAD, - SQ_PERF_SEL_INSTS_TEX_LOAD_GFX10); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "SMEM Load Instructions", "Shaders", - "Number of SMEM load instructions executed", SHADER_INSTRUCTIONS_SMEM_LOAD, - SQ_PERF_SEL_INSTS_SMEM_GFX10); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Store Instructions", "Shaders", - "Number of VMEM store instructions executed", SHADER_INSTRUCTIONS_VMEM_STORE, - SQ_PERF_SEL_INSTS_TEX_STORE_GFX10); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "LDS Instructions", "Shaders", - "Number of LDS Instructions executed", SHADER_INSTRUCTIONS_LDS, - SQ_PERF_SEL_INSTS_LDS_GFX10); - ADD_PC(RADV_PC_OP_SUM, GENERIC, "GDS Instructions", "Shaders", - "Number of GDS Instructions executed", SHADER_INSTRUCTIONS_GDS, - SQ_PERF_SEL_INSTS_GDS_GFX10); + ADD_PC(RADV_PC_OP_MAX, CYCLES, "GPU active cycles", "GRBM", "cycles the GPU is active processing a command buffer.", + GPU_CYCLES, GRBM_PERF_SEL_GUI_ACTIVE); + + ADD_PC(RADV_PC_OP_SUM, GENERIC, "Waves", "Shaders", "Number of waves executed", SHADER_WAVES, SQ_PERF_SEL_WAVES); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "Instructions", "Shaders", "Number of Instructions executed", SHADER_INSTRUCTIONS, + SQ_PERF_SEL_INSTS_ALL_GFX10); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "VALU Instructions", "Shaders", "Number of VALU Instructions executed", + SHADER_INSTRUCTIONS_VALU, SQ_PERF_SEL_INSTS_VALU_GFX10); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "SALU Instructions", "Shaders", "Number of SALU Instructions executed", + SHADER_INSTRUCTIONS_SALU, SQ_PERF_SEL_INSTS_SALU_GFX10); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Load Instructions", "Shaders", "Number of VMEM load instructions executed", + SHADER_INSTRUCTIONS_VMEM_LOAD, SQ_PERF_SEL_INSTS_TEX_LOAD_GFX10); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "SMEM Load Instructions", "Shaders", "Number of SMEM load instructions executed", + SHADER_INSTRUCTIONS_SMEM_LOAD, SQ_PERF_SEL_INSTS_SMEM_GFX10); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Store Instructions", "Shaders", "Number of VMEM store instructions executed", + SHADER_INSTRUCTIONS_VMEM_STORE, SQ_PERF_SEL_INSTS_TEX_STORE_GFX10); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "LDS Instructions", "Shaders", "Number of LDS Instructions executed", + SHADER_INSTRUCTIONS_LDS, SQ_PERF_SEL_INSTS_LDS_GFX10); + ADD_PC(RADV_PC_OP_SUM, GENERIC, "GDS Instructions", "Shaders", "Number of GDS Instructions executed", + SHADER_INSTRUCTIONS_GDS, SQ_PERF_SEL_INSTS_GDS_GFX10); ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "VALU Busy", "Shader Utilization", - "Percentage of time the VALU units are busy", SHADER_VALU_BUSY, - SQ_PERF_SEL_INST_CYCLES_VALU_GFX10, CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_SIMD); + "Percentage of time the VALU units are busy", SHADER_VALU_BUSY, SQ_PERF_SEL_INST_CYCLES_VALU_GFX10, + CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_SIMD); ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "SALU Busy", "Shader Utilization", - "Percentage of time the SALU units are busy", SHADER_SALU_BUSY, - SQ_PERF_SEL_INSTS_SALU_GFX10, CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_CUS); + "Percentage of time the SALU units are busy", SHADER_SALU_BUSY, SQ_PERF_SEL_INSTS_SALU_GFX10, + CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_CUS); if (pdev->rad_info.gfx_level >= GFX10_3) { - ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory", - "Number of bytes read from VRAM", VRAM_READ_SIZE, GL2C_PERF_SEL_EA_RDREQ_32B_GFX103, - CONSTANT(32), GL2C_PERF_SEL_EA_RDREQ_64B_GFX103, CONSTANT(64), - GL2C_PERF_SEL_EA_RDREQ_96B_GFX103, CONSTANT(96), GL2C_PERF_SEL_EA_RDREQ_128B_GFX103, + ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory", "Number of bytes read from VRAM", + VRAM_READ_SIZE, GL2C_PERF_SEL_EA_RDREQ_32B_GFX103, CONSTANT(32), GL2C_PERF_SEL_EA_RDREQ_64B_GFX103, + CONSTANT(64), GL2C_PERF_SEL_EA_RDREQ_96B_GFX103, CONSTANT(96), GL2C_PERF_SEL_EA_RDREQ_128B_GFX103, CONSTANT(128)); - ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM write size", "Memory", - "Number of bytes written to VRAM", VRAM_WRITE_SIZE, GL2C_PERF_SEL_MC_WRREQ_GFX103, - CONSTANT(32), GL2C_PERF_SEL_EA_WRREQ_64B_GFX103, CONSTANT(64), CONSTANT(0), - CONSTANT(0), CONSTANT(0), CONSTANT(0)); + ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM write size", "Memory", "Number of bytes written to VRAM", + VRAM_WRITE_SIZE, GL2C_PERF_SEL_MC_WRREQ_GFX103, CONSTANT(32), GL2C_PERF_SEL_EA_WRREQ_64B_GFX103, + CONSTANT(64), CONSTANT(0), CONSTANT(0), CONSTANT(0), CONSTANT(0)); } else { - ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory", - "Number of bytes read from VRAM", VRAM_READ_SIZE, GL2C_PERF_SEL_EA_RDREQ_32B_GFX101, - CONSTANT(32), GL2C_PERF_SEL_EA_RDREQ_64B_GFX101, CONSTANT(64), - GL2C_PERF_SEL_EA_RDREQ_96B_GFX101, CONSTANT(96), GL2C_PERF_SEL_EA_RDREQ_128B_GFX101, + ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory", "Number of bytes read from VRAM", + VRAM_READ_SIZE, GL2C_PERF_SEL_EA_RDREQ_32B_GFX101, CONSTANT(32), GL2C_PERF_SEL_EA_RDREQ_64B_GFX101, + CONSTANT(64), GL2C_PERF_SEL_EA_RDREQ_96B_GFX101, CONSTANT(96), GL2C_PERF_SEL_EA_RDREQ_128B_GFX101, CONSTANT(128)); - ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM write size", "Memory", - "Number of bytes written to VRAM", VRAM_WRITE_SIZE, GL2C_PERF_SEL_MC_WRREQ_GFX101, - CONSTANT(32), GL2C_PERF_SEL_EA_WRREQ_64B_GFX101, CONSTANT(32), CONSTANT(0), - CONSTANT(0), CONSTANT(0), CONSTANT(0)); + ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM write size", "Memory", "Number of bytes written to VRAM", + VRAM_WRITE_SIZE, GL2C_PERF_SEL_MC_WRREQ_GFX101, CONSTANT(32), GL2C_PERF_SEL_EA_WRREQ_64B_GFX101, + CONSTANT(32), CONSTANT(0), CONSTANT(0), CONSTANT(0), CONSTANT(0)); } - ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L0 cache hit ratio", "Memory", "Hit ratio of L0 cache", - L0_CACHE_HIT_RATIO, TCP_PERF_SEL_REQ_MISS_GFX10, TCP_PERF_SEL_REQ_GFX10); - ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L1 cache hit ratio", "Memory", "Hit ratio of L1 cache", - L1_CACHE_HIT_RATIO, GL1C_PERF_SEL_REQ_MISS, GL1C_PERF_SEL_REQ); + ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L0 cache hit ratio", "Memory", "Hit ratio of L0 cache", L0_CACHE_HIT_RATIO, + TCP_PERF_SEL_REQ_MISS_GFX10, TCP_PERF_SEL_REQ_GFX10); + ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L1 cache hit ratio", "Memory", "Hit ratio of L1 cache", L1_CACHE_HIT_RATIO, + GL1C_PERF_SEL_REQ_MISS, GL1C_PERF_SEL_REQ); if (pdev->rad_info.gfx_level >= GFX10_3) { - ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L2 cache hit ratio", "Memory", - "Hit ratio of L2 cache", L2_CACHE_HIT_RATIO, GL2C_PERF_SEL_MISS_GFX103, - GL2C_PERF_SEL_REQ); + ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L2 cache hit ratio", "Memory", "Hit ratio of L2 cache", + L2_CACHE_HIT_RATIO, GL2C_PERF_SEL_MISS_GFX103, GL2C_PERF_SEL_REQ); } else { - ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L2 cache hit ratio", "Memory", - "Hit ratio of L2 cache", L2_CACHE_HIT_RATIO, GL2C_PERF_SEL_MISS_GFX101, - GL2C_PERF_SEL_REQ); + ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L2 cache hit ratio", "Memory", "Hit ratio of L2 cache", + L2_CACHE_HIT_RATIO, GL2C_PERF_SEL_MISS_GFX101, GL2C_PERF_SEL_REQ); } } @@ -323,8 +303,8 @@ cmp_uint32_t(const void *a, const void *b) } static VkResult -radv_get_counter_registers(const struct radv_physical_device *pdevice, uint32_t num_indices, - const uint32_t *indices, unsigned *out_num_regs, uint32_t **out_regs) +radv_get_counter_registers(const struct radv_physical_device *pdevice, uint32_t num_indices, const uint32_t *indices, + unsigned *out_num_regs, uint32_t **out_regs) { ASSERTED uint32_t num_counters = pdevice->num_perfcounters; const struct radv_perfcounter_desc *descs = pdevice->perfcounters; @@ -338,8 +318,7 @@ radv_get_counter_registers(const struct radv_physical_device *pdevice, uint32_t for (unsigned i = 0; i < num_indices; ++i) { uint32_t index = indices[i]; assert(index < num_counters); - for (unsigned j = 0; j < ARRAY_SIZE(descs[index].impl.regs) && descs[index].impl.regs[j]; - ++j) { + for (unsigned j = 0; j < ARRAY_SIZE(descs[index].impl.regs) && descs[index].impl.regs[j]; ++j) { if (!G_REG_CONSTANT(descs[index].impl.regs[j])) regs[reg_cnt++] = descs[index].impl.regs[j]; } @@ -362,13 +341,11 @@ radv_get_counter_registers(const struct radv_physical_device *pdevice, uint32_t static unsigned radv_pc_get_num_instances(const struct radv_physical_device *pdevice, struct ac_pc_block *ac_block) { - return ac_block->num_instances * - ((ac_block->b->b->flags & AC_PC_BLOCK_SE) ? pdevice->rad_info.max_se : 1); + return ac_block->num_instances * ((ac_block->b->b->flags & AC_PC_BLOCK_SE) ? pdevice->rad_info.max_se : 1); } static unsigned -radv_get_num_counter_passes(const struct radv_physical_device *pdevice, unsigned num_regs, - const uint32_t *regs) +radv_get_num_counter_passes(const struct radv_physical_device *pdevice, unsigned num_regs, const uint32_t *regs) { enum ac_pc_gpu_block prev_block = NUM_GPU_BLOCK; unsigned block_reg_count = 0; @@ -386,8 +363,7 @@ radv_get_num_counter_passes(const struct radv_physical_device *pdevice, unsigned ++block_reg_count; - passes_needed = - MAX2(passes_needed, DIV_ROUND_UP(block_reg_count, ac_block->b->b->num_counters)); + passes_needed = MAX2(passes_needed, DIV_ROUND_UP(block_reg_count, ac_block->b->b->num_counters)); } return passes_needed; @@ -401,8 +377,8 @@ radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool) } VkResult -radv_pc_init_query_pool(struct radv_physical_device *pdevice, - const VkQueryPoolCreateInfo *pCreateInfo, struct radv_pc_query_pool *pool) +radv_pc_init_query_pool(struct radv_physical_device *pdevice, const VkQueryPoolCreateInfo *pCreateInfo, + struct radv_pc_query_pool *pool) { const VkQueryPoolPerformanceCreateInfoKHR *perf_info = vk_find_struct_const(pCreateInfo->pNext, QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR); @@ -411,9 +387,8 @@ radv_pc_init_query_pool(struct radv_physical_device *pdevice, if (!radv_init_perfcounter_descs(pdevice)) return VK_ERROR_OUT_OF_HOST_MEMORY; - result = - radv_get_counter_registers(pdevice, perf_info->counterIndexCount, perf_info->pCounterIndices, - &pool->num_pc_regs, &pool->pc_regs); + result = radv_get_counter_registers(pdevice, perf_info->counterIndexCount, perf_info->pCounterIndices, + &pool->num_pc_regs, &pool->pc_regs); if (result != VK_SUCCESS) return result; @@ -485,8 +460,7 @@ radv_emit_instance(struct radv_cmd_buffer *cmd_buffer, int se, int instance) } static void -radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, - unsigned *selectors) +radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, unsigned *selectors) { struct ac_pc_block_base *regs = block->b->b; struct radeon_cmdbuf *cs = cmd_buffer->cs; @@ -499,8 +473,7 @@ radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, return; for (idx = 0; idx < count; ++idx) { - radeon_set_perfctr_reg(cmd_buffer, regs->select0[idx], - G_REG_SEL(selectors[idx]) | regs->select_or); + radeon_set_perfctr_reg(cmd_buffer, regs->select0[idx], G_REG_SEL(selectors[idx]) | regs->select_or); } for (idx = 0; idx < regs->num_spm_counters; idx++) { @@ -510,8 +483,8 @@ radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, } static void -radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, - unsigned count, uint64_t va) +radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, + uint64_t va) { struct ac_pc_block_base *regs = block->b->b; struct radeon_cmdbuf *cs = cmd_buffer->cs; @@ -524,22 +497,20 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p reg = regs->counters[idx]; radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) | - COPY_DATA_WR_CONFIRM | COPY_DATA_COUNT_SEL); /* 64 bits */ + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) | COPY_DATA_WR_CONFIRM | + COPY_DATA_COUNT_SEL); /* 64 bits */ radeon_emit(cs, reg >> 2); radeon_emit(cs, 0); /* unused */ radeon_emit(cs, va); radeon_emit(cs, va >> 32); - va += sizeof(uint64_t) * 2 * - radv_pc_get_num_instances(cmd_buffer->device->physical_device, block); + va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(cmd_buffer->device->physical_device, block); reg += reg_delta; } } static void -radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, - uint64_t va) +radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, uint64_t va) { unsigned se_end = 1; if (block->b->b->flags & AC_PC_BLOCK_SE) @@ -576,8 +547,7 @@ radv_pc_wait_idle(struct radv_cmd_buffer *cmd_buffer) } static void -radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, - uint64_t va, bool end) +radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va, bool end) { struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; @@ -590,13 +560,12 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query radv_emit_instance(cmd_buffer, -1, -1); radv_emit_windowed_counters(cmd_buffer->device, cs, cmd_buffer->qf, false); - radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, - S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | - S_036020_PERFMON_SAMPLE_ENABLE(1)); + radeon_set_uconfig_reg( + cs, R_036020_CP_PERFMON_CNTL, + S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1)); for (unsigned pass = 0; pass < pool->num_passes; ++pass) { - uint64_t pred_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + - PERF_CTR_BO_PASS_OFFSET + 8 * pass; + uint64_t pred_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET + 8 * pass; uint64_t reg_va = va + (end ? 8 : 0); radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0)); @@ -630,8 +599,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query if (end) { uint64_t signal_va = va + pool->b.stride - 8 - 8 * pass; radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, - S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(cs, signal_va); radeon_emit(cs, signal_va >> 32); radeon_emit(cs, 1); /* value */ @@ -644,8 +612,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query } void -radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, - uint64_t va) +radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va) { struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; @@ -661,8 +628,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pool->b.bo); radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->perf_counter_bo); - uint64_t perf_ctr_va = - radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET; + uint64_t perf_ctr_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET; radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(cs, perf_ctr_va); @@ -679,8 +645,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo radv_perfcounter_emit_shaders(cs, 0x7f); for (unsigned pass = 0; pass < pool->num_passes; ++pass) { - uint64_t pred_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + - PERF_CTR_BO_PASS_OFFSET + 8 * pass; + uint64_t pred_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET + 8 * pass; radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0)); radeon_emit(cs, pred_va); @@ -730,21 +695,18 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool struct radeon_cmdbuf *cs = cmd_buffer->cs; ASSERTED unsigned cdw_max; - cdw_max = - radeon_check_space(cmd_buffer->device->ws, cs, - 256 + /* Reserved for things that don't scale with passes/counters */ - 5 * pool->num_passes + /* COND_EXECs */ - pool->b.stride / 8 * 8); + cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, + 256 + /* Reserved for things that don't scale with passes/counters */ + 5 * pool->num_passes + /* COND_EXECs */ + pool->b.stride / 8 * 8); radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pool->b.bo); radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->perf_counter_bo); - uint64_t perf_ctr_va = - radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET; + uint64_t perf_ctr_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET; si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, - radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, - EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1, - cmd_buffer->gfx9_fence_va); + radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, + EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1, cmd_buffer->gfx9_fence_va); radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, perf_ctr_va, 1, 0xffffffff); radv_pc_wait_idle(cmd_buffer); @@ -805,8 +767,7 @@ radv_pc_get_result(const struct radv_perfcounter_impl *impl, const uint64_t *dat result.float64 = radv_pc_sum_reg(impl->regs[0], data); break; case RADV_PC_OP_RATIO_DIVSCALE: - result.float64 = radv_pc_sum_reg(impl->regs[0], data) / - (double)radv_pc_sum_reg(impl->regs[1], data) / + result.float64 = radv_pc_sum_reg(impl->regs[0], data) / (double)radv_pc_sum_reg(impl->regs[1], data) / radv_pc_sum_reg(impl->regs[2], data) * 100.0; break; case RADV_PC_OP_REVERSE_RATIO: { @@ -817,8 +778,7 @@ radv_pc_get_result(const struct radv_perfcounter_impl *impl, const uint64_t *dat case RADV_PC_OP_SUM_WEIGHTED_4: result.float64 = 0.0; for (unsigned i = 0; i < 4; ++i) - result.float64 += - radv_pc_sum_reg(impl->regs[2 * i], data) * radv_pc_sum_reg(impl->regs[2 * i + 1], data); + result.float64 += radv_pc_sum_reg(impl->regs[2 * i], data) * radv_pc_sum_reg(impl->regs[2 * i + 1], data); break; default: unreachable("unhandled performance counter operation"); @@ -871,7 +831,7 @@ radv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( pCounters[i].storage = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR; memset(&pCounters[i].uuid, 0, sizeof(pCounters[i].uuid)); - strcpy((char*)&pCounters[i].uuid, "RADV"); + strcpy((char *)&pCounters[i].uuid, "RADV"); const uint32_t uuid = descs[i].uuid; memcpy(&pCounters[i].uuid[12], &uuid, sizeof(uuid)); @@ -879,8 +839,7 @@ radv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( if (pCounterDescriptions) { pCounterDescriptions[i].sType = VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_DESCRIPTION_KHR; - pCounterDescriptions[i].flags = - VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_BIT_KHR; + pCounterDescriptions[i].flags = VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_BIT_KHR; strcpy(pCounterDescriptions[i].name, descs[i].name); strcpy(pCounterDescriptions[i].category, descs[i].category); strcpy(pCounterDescriptions[i].description, descs[i].description); @@ -891,8 +850,8 @@ radv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( VKAPI_ATTR void VKAPI_CALL radv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR( - VkPhysicalDevice physicalDevice, - const VkQueryPoolPerformanceCreateInfoKHR *pPerformanceQueryCreateInfo, uint32_t *pNumPasses) + VkPhysicalDevice physicalDevice, const VkQueryPoolPerformanceCreateInfoKHR *pPerformanceQueryCreateInfo, + uint32_t *pNumPasses) { RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); @@ -908,14 +867,12 @@ radv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR( return; } - assert(vk_queue_to_radv(pdevice, pPerformanceQueryCreateInfo->queueFamilyIndex) == - RADV_QUEUE_GENERAL); + assert(vk_queue_to_radv(pdevice, pPerformanceQueryCreateInfo->queueFamilyIndex) == RADV_QUEUE_GENERAL); unsigned num_regs = 0; uint32_t *regs = NULL; - VkResult result = - radv_get_counter_registers(pdevice, pPerformanceQueryCreateInfo->counterIndexCount, - pPerformanceQueryCreateInfo->pCounterIndices, &num_regs, ®s); + VkResult result = radv_get_counter_registers(pdevice, pPerformanceQueryCreateInfo->counterIndexCount, + pPerformanceQueryCreateInfo->pCounterIndices, &num_regs, ®s); if (result != VK_SUCCESS) { /* Can't return an error, so log */ fprintf(stderr, "radv: Failed to allocate memory for perf counters\n"); diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index fce403e..56cc402 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -46,8 +46,8 @@ typedef void *drmDevicePtr; #include "drm-uapi/amdgpu_drm.h" #include "winsys/amdgpu/radv_amdgpu_winsys_public.h" #endif -#include "git_sha1.h" #include "winsys/null/radv_null_winsys_public.h" +#include "git_sha1.h" #ifdef LLVM_AVAILABLE #include "ac_llvm_util.h" @@ -71,22 +71,19 @@ static bool radv_taskmesh_enabled(const struct radv_physical_device *pdevice) { return pdevice->use_ngg && !pdevice->use_llvm && pdevice->rad_info.gfx_level >= GFX10_3 && - !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE) && - pdevice->rad_info.has_gang_submit; + !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE) && pdevice->rad_info.has_gang_submit; } static bool radv_vrs_attachment_enabled(const struct radv_physical_device *pdevice) { - return pdevice->rad_info.gfx_level >= GFX11 || - !(pdevice->instance->debug_flags & RADV_DEBUG_NO_HIZ); + return pdevice->rad_info.gfx_level >= GFX11 || !(pdevice->instance->debug_flags & RADV_DEBUG_NO_HIZ); } static bool radv_NV_device_generated_commands_enabled(const struct radv_physical_device *device) { - return device->rad_info.gfx_level >= GFX7 && - !(device->instance->debug_flags & RADV_DEBUG_NO_IBS) && + return device->rad_info.gfx_level >= GFX7 && !(device->instance->debug_flags & RADV_DEBUG_NO_IBS) && driQueryOptionb(&device->instance->dri_options, "radv_dgc"); } @@ -103,8 +100,7 @@ parse_hex(char *out, const char *in, unsigned length) out[i] = 0; for (unsigned i = 0; i < 2 * length; ++i) { - unsigned v = - in[i] <= '9' ? in[i] - '0' : (in[i] >= 'a' ? (in[i] - 'a' + 10) : (in[i] - 'A' + 10)); + unsigned v = in[i] <= '9' ? in[i] - '0' : (in[i] >= 'a' ? (in[i] - 'a' + 10) : (in[i] - 'A' + 10)); out[i / 2] |= v << (4 * (1 - i % 2)); } } @@ -133,8 +129,7 @@ radv_device_get_cache_uuid(struct radv_physical_device *pdevice, void *uuid) #endif #ifdef LLVM_AVAILABLE - if (pdevice->use_llvm && - !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx)) + if (pdevice->use_llvm && !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx)) return -1; #endif @@ -202,8 +197,7 @@ radv_get_adjusted_vram_size(struct radv_physical_device *device) static uint64_t radv_get_visible_vram_size(struct radv_physical_device *device) { - return MIN2(radv_get_adjusted_vram_size(device), - (uint64_t)device->rad_info.vram_vis_size_kb * 1024); + return MIN2(radv_get_adjusted_vram_size(device), (uint64_t)device->rad_info.vram_vis_size_kb * 1024); } static uint64_t @@ -294,8 +288,7 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device) device->memory_domains[type_count] = RADEON_DOMAIN_GTT; device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS; device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){ - .propertyFlags = - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, .heapIndex = gart_index, }; } @@ -303,8 +296,7 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device) device->memory_domains[type_count] = RADEON_DOMAIN_VRAM; device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS; device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){ - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, .heapIndex = visible_vram_index, }; @@ -312,8 +304,7 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device) device->memory_domains[type_count] = RADEON_DOMAIN_VRAM; device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_32BIT; device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){ - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, .heapIndex = visible_vram_index, }; @@ -323,16 +314,16 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device) device->memory_domains[type_count] = RADEON_DOMAIN_GTT; device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS; device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){ - .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, .heapIndex = gart_index, }; device->memory_domains[type_count] = RADEON_DOMAIN_GTT; device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_32BIT; device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){ - .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, .heapIndex = gart_index, }; } @@ -342,13 +333,11 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device) for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) { VkMemoryType mem_type = device->memory_properties.memoryTypes[i]; - if (((mem_type.propertyFlags & - (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) || + if (((mem_type.propertyFlags & (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) || mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && !(device->memory_flags[i] & RADEON_FLAG_32BIT)) { - VkMemoryPropertyFlags property_flags = mem_type.propertyFlags | - VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD | + VkMemoryPropertyFlags property_flags = mem_type.propertyFlags | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD; device->memory_domains[type_count] = device->memory_domains[i]; @@ -381,8 +370,7 @@ radv_find_memory_index(const struct radv_physical_device *pdevice, VkMemoryPrope } static void -radv_get_binning_settings(const struct radv_physical_device *pdevice, - struct radv_binning_settings *settings) +radv_get_binning_settings(const struct radv_physical_device *pdevice, struct radv_binning_settings *settings) { settings->context_states_per_bin = 1; settings->persistent_states_per_bin = 1; @@ -473,10 +461,8 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device .KHR_variable_pointers = true, .KHR_video_queue = !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE), .KHR_video_decode_queue = !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE), - .KHR_video_decode_h264 = - VIDEO_CODEC_H264DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE), - .KHR_video_decode_h265 = - VIDEO_CODEC_H265DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE), + .KHR_video_decode_h264 = VIDEO_CODEC_H264DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE), + .KHR_video_decode_h265 = VIDEO_CODEC_H265DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE), .KHR_vulkan_memory_model = true, .KHR_workgroup_memory_explicit_layout = true, .KHR_zero_initialize_workgroup_memory = true, @@ -486,8 +472,7 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device .EXT_border_color_swizzle = device->rad_info.gfx_level >= GFX10, .EXT_buffer_device_address = true, .EXT_calibrated_timestamps = RADV_SUPPORT_CALIBRATED_TIMESTAMPS && - !(device->rad_info.family == CHIP_RAVEN || - device->rad_info.family == CHIP_RAVEN2), + !(device->rad_info.family == CHIP_RAVEN || device->rad_info.family == CHIP_RAVEN2), .EXT_color_write_enable = true, .EXT_conditional_rendering = true, .EXT_conservative_rasterization = device->rad_info.gfx_level >= GFX9, @@ -510,8 +495,7 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device .EXT_external_memory_host = device->rad_info.has_userptr, .EXT_global_priority = true, .EXT_global_priority_query = true, - .EXT_graphics_pipeline_library = - !device->use_llvm && !(device->instance->debug_flags & RADV_DEBUG_NO_GPL), + .EXT_graphics_pipeline_library = !device->use_llvm && !(device->instance->debug_flags & RADV_DEBUG_NO_GPL), .EXT_host_query_reset = true, .EXT_image_2d_view_of_3d = true, .EXT_image_drm_format_modifier = device->rad_info.gfx_level >= GFX9, @@ -563,8 +547,7 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device .EXT_tooling_info = true, .EXT_transform_feedback = true, .EXT_vertex_attribute_divisor = true, - .EXT_vertex_input_dynamic_state = - !device->use_llvm && !radv_NV_device_generated_commands_enabled(device), + .EXT_vertex_input_dynamic_state = !device->use_llvm && !radv_NV_device_generated_commands_enabled(device), .EXT_ycbcr_image_arrays = true, .AMD_buffer_marker = true, .AMD_device_coherent_memory = true, @@ -598,20 +581,17 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device * using it. */ .VALVE_descriptor_set_host_mapping = - device->vk.instance->app_info.engine_name && - strcmp(device->vk.instance->app_info.engine_name, "vkd3d") == 0, + device->vk.instance->app_info.engine_name && strcmp(device->vk.instance->app_info.engine_name, "vkd3d") == 0, .VALVE_mutable_descriptor_type = true, }; } static void -radv_physical_device_get_features(const struct radv_physical_device *pdevice, - struct vk_features *features) +radv_physical_device_get_features(const struct radv_physical_device *pdevice, struct vk_features *features) { bool taskmesh_en = radv_taskmesh_enabled(pdevice); bool has_perf_query = radv_perf_query_supported(pdevice); - bool has_shader_image_float_minmax = pdevice->rad_info.gfx_level != GFX8 && - pdevice->rad_info.gfx_level != GFX9 && + bool has_shader_image_float_minmax = pdevice->rad_info.gfx_level != GFX8 && pdevice->rad_info.gfx_level != GFX9 && pdevice->rad_info.gfx_level != GFX11; *features = (struct vk_features){ @@ -885,10 +865,8 @@ radv_physical_device_get_features(const struct radv_physical_device *pdevice, .shaderBufferFloat16Atomics = false, .shaderBufferFloat16AtomicAdd = false, .shaderBufferFloat16AtomicMinMax = false, - .shaderBufferFloat32AtomicMinMax = - radv_has_shader_buffer_float_minmax(pdevice, 32), - .shaderBufferFloat64AtomicMinMax = - radv_has_shader_buffer_float_minmax(pdevice, 64), + .shaderBufferFloat32AtomicMinMax = radv_has_shader_buffer_float_minmax(pdevice, 32), + .shaderBufferFloat64AtomicMinMax = radv_has_shader_buffer_float_minmax(pdevice, 64), .shaderSharedFloat16Atomics = false, .shaderSharedFloat16AtomicAdd = false, .shaderSharedFloat16AtomicMinMax = false, @@ -1048,12 +1026,10 @@ radv_max_descriptor_set_size() * both. This limit is for the pipeline layout, not for the set layout, but * there is no set limit, so we just set a pipeline limit. I don't think * any app is going to hit this soon. */ - return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS - - MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) / + return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS - MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) / (32 /* uniform buffer, 32 due to potential space wasted on alignment */ + 32 /* storage buffer, 32 due to potential space wasted on alignment */ + - 32 /* sampler, largest when combined with image */ + 64 /* sampled image */ + - 64 /* storage image */); + 32 /* sampler, largest when combined with image */ + 64 /* sampled image */ + 64 /* storage image */); } static uint32_t @@ -1074,8 +1050,7 @@ radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdevice) } VKAPI_ATTR void VKAPI_CALL -radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties *pProperties) +radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties *pProperties) { RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); VkSampleCountFlags sample_counts = 0xf; @@ -1219,8 +1194,7 @@ radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, } static void -radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice, - VkPhysicalDeviceVulkan11Properties *p) +radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice, VkPhysicalDeviceVulkan11Properties *p) { assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES); @@ -1238,11 +1212,10 @@ radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice, if (radv_enable_rt(pdevice, true)) p->subgroupSupportedStages |= RADV_RT_STAGE_BITS; - p->subgroupSupportedOperations = - VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT | - VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT | - VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT | - VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT; + p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT | + VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT | + VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT | + VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT; p->subgroupQuadOperationsInAllStages = true; p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; @@ -1276,8 +1249,7 @@ radv_get_compiler_string(struct radv_physical_device *pdevice) } static void -radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice, - VkPhysicalDeviceVulkan12Properties *p) +radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice, VkPhysicalDeviceVulkan12Properties *p) { assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES); @@ -1335,8 +1307,7 @@ radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice, p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm; p->shaderSignedZeroInfNanPreserveFloat32 = true; - p->shaderDenormFlushToZeroFloat16 = - pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm; + p->shaderDenormFlushToZeroFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm; p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit; p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit; p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm; @@ -1376,8 +1347,8 @@ radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice, p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size; /* We support all of the depth resolve modes */ - p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT | - VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT; + p->supportedDepthResolveModes = + VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT | VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT; /* Average doesn't make sense for stencil so we don't support that */ p->supportedStencilResolveModes = @@ -1396,8 +1367,7 @@ radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice, } static void -radv_get_physical_device_properties_1_3(struct radv_physical_device *pdevice, - VkPhysicalDeviceVulkan13Properties *p) +radv_get_physical_device_properties_1_3(struct radv_physical_device *pdevice, VkPhysicalDeviceVulkan13Properties *p) { assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES); @@ -1413,8 +1383,7 @@ radv_get_physical_device_properties_1_3(struct radv_physical_device *pdevice, p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE; p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS; - p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = - MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS; + p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS; p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT; p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT; p->maxInlineUniformTotalSize = UINT16_MAX; @@ -1441,8 +1410,7 @@ radv_get_physical_device_properties_1_3(struct radv_physical_device *pdevice, p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = accel && gfx11plus; p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = accel; p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = accel; - p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = - accel && gfx11plus; + p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = accel && gfx11plus; p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = accel && !gfx11plus; p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = accel && !gfx11plus; p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false; @@ -1462,8 +1430,7 @@ radv_get_physical_device_properties_1_3(struct radv_physical_device *pdevice, } VKAPI_ATTR void VKAPI_CALL -radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties2 *pProperties) +radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties2 *pProperties) { RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); @@ -1483,8 +1450,7 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, }; radv_get_physical_device_properties_1_3(pdevice, &core_1_3); - vk_foreach_struct(ext, pProperties->pNext) - { + vk_foreach_struct (ext, pProperties->pNext) { if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1)) continue; if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2)) @@ -1494,8 +1460,7 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, switch (ext->sType) { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: { - VkPhysicalDevicePushDescriptorPropertiesKHR *properties = - (VkPhysicalDevicePushDescriptorPropertiesKHR *)ext; + VkPhysicalDevicePushDescriptorPropertiesKHR *properties = (VkPhysicalDevicePushDescriptorPropertiesKHR *)ext; properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; break; } @@ -1512,8 +1477,7 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: { - VkPhysicalDeviceShaderCorePropertiesAMD *properties = - (VkPhysicalDeviceShaderCorePropertiesAMD *)ext; + VkPhysicalDeviceShaderCorePropertiesAMD *properties = (VkPhysicalDeviceShaderCorePropertiesAMD *)ext; /* Shader engines. */ properties->shaderEngineCount = pdevice->rad_info.max_se; @@ -1537,8 +1501,7 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: { - VkPhysicalDeviceShaderCoreProperties2AMD *properties = - (VkPhysicalDeviceShaderCoreProperties2AMD *)ext; + VkPhysicalDeviceShaderCoreProperties2AMD *properties = (VkPhysicalDeviceShaderCoreProperties2AMD *)ext; properties->shaderCoreFeatures = 0; properties->activeComputeUnitCount = pdevice->rad_info.num_cu; @@ -1566,8 +1529,7 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, } #ifndef _WIN32 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: { - VkPhysicalDevicePCIBusInfoPropertiesEXT *properties = - (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext; + VkPhysicalDevicePCIBusInfoPropertiesEXT *properties = (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext; properties->pciDomain = pdevice->bus_info.domain; properties->pciBus = pdevice->bus_info.bus; properties->pciDevice = pdevice->bus_info.dev; @@ -1591,10 +1553,8 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: { - VkPhysicalDeviceSampleLocationsPropertiesEXT *properties = - (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext; - properties->sampleLocationSampleCounts = - VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT; + VkPhysicalDeviceSampleLocationsPropertiesEXT *properties = (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext; + properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT; properties->maxSampleLocationGridSize = (VkExtent2D){2, 2}; properties->sampleLocationCoordinateRange[0] = 0.0f; properties->sampleLocationCoordinateRange[1] = 0.9375f; @@ -1603,21 +1563,18 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: { - VkPhysicalDeviceLineRasterizationPropertiesEXT *props = - (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext; + VkPhysicalDeviceLineRasterizationPropertiesEXT *props = (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext; props->lineSubPixelPrecisionBits = 4; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: { - VkPhysicalDeviceRobustness2PropertiesEXT *properties = - (VkPhysicalDeviceRobustness2PropertiesEXT *)ext; + VkPhysicalDeviceRobustness2PropertiesEXT *properties = (VkPhysicalDeviceRobustness2PropertiesEXT *)ext; properties->robustStorageBufferAccessSizeAlignment = 4; properties->robustUniformBufferAccessSizeAlignment = 4; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: { - VkPhysicalDeviceCustomBorderColorPropertiesEXT *props = - (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext; + VkPhysicalDeviceCustomBorderColorPropertiesEXT *props = (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext; props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT; break; } @@ -1639,8 +1596,7 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, props->maxFragmentSizeAspectRatio = 2; props->maxFragmentShadingRateCoverageSamples = 32; props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT; - props->fragmentShadingRateWithShaderDepthStencilWrites = - !pdevice->rad_info.has_vrs_ds_export_bug; + props->fragmentShadingRateWithShaderDepthStencilWrites = !pdevice->rad_info.has_vrs_ds_export_bug; props->fragmentShadingRateWithSampleMask = true; props->fragmentShadingRateWithShaderSampleMask = false; props->fragmentShadingRateWithConservativeRasterization = true; @@ -1650,8 +1606,7 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: { - VkPhysicalDeviceProvokingVertexPropertiesEXT *props = - (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext; + VkPhysicalDeviceProvokingVertexPropertiesEXT *props = (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext; props->provokingVertexModePerPipeline = true; props->transformFeedbackPreservesTriangleFanProvokingVertex = true; break; @@ -1666,8 +1621,7 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, pProperties->properties.limits.maxPerStageDescriptorStorageBuffers; props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = pProperties->properties.limits.maxPerStageDescriptorStorageBuffers; - props->maxDescriptorSetAccelerationStructures = - pProperties->properties.limits.maxDescriptorSetStorageBuffers; + props->maxDescriptorSetAccelerationStructures = pProperties->properties.limits.maxDescriptorSetStorageBuffers; props->maxDescriptorSetUpdateAfterBindAccelerationStructures = pProperties->properties.limits.maxDescriptorSetStorageBuffers; props->minAccelerationStructureScratchOffsetAlignment = 128; @@ -1694,8 +1648,7 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, } #endif case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: { - VkPhysicalDeviceMultiDrawPropertiesEXT *props = - (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext; + VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext; props->maxMultiDrawCount = 2048; break; } @@ -1715,8 +1668,7 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES: { - VkPhysicalDeviceMaintenance4Properties *properties = - (VkPhysicalDeviceMaintenance4Properties *)ext; + VkPhysicalDeviceMaintenance4Properties *properties = (VkPhysicalDeviceMaintenance4Properties *)ext; properties->maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE; break; } @@ -1725,8 +1677,7 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, (VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext; STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) == sizeof(properties->shaderModuleIdentifierAlgorithmUUID)); - memcpy(properties->shaderModuleIdentifierAlgorithmUUID, - vk_shaderModuleIdentifierAlgorithmUUID, + memcpy(properties->shaderModuleIdentifierAlgorithmUUID, vk_shaderModuleIdentifierAlgorithmUUID, sizeof(properties->shaderModuleIdentifierAlgorithmUUID)); break; } @@ -1763,8 +1714,7 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_EXT: { - VkPhysicalDeviceMeshShaderPropertiesEXT *properties = - (VkPhysicalDeviceMeshShaderPropertiesEXT *)ext; + VkPhysicalDeviceMeshShaderPropertiesEXT *properties = (VkPhysicalDeviceMeshShaderPropertiesEXT *)ext; properties->maxTaskWorkGroupTotalCount = 4194304; /* 2^22 min required */ properties->maxTaskWorkGroupCount[0] = 65535; @@ -1789,12 +1739,10 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, properties->maxMeshOutputMemorySize = 32 * 1024; /* 32K min required */ properties->maxMeshSharedMemorySize = 28672; /* 28K min required */ properties->maxMeshPayloadAndSharedMemorySize = - properties->maxTaskPayloadSize + - properties->maxMeshSharedMemorySize; /* 28K min required */ + properties->maxTaskPayloadSize + properties->maxMeshSharedMemorySize; /* 28K min required */ properties->maxMeshPayloadAndOutputMemorySize = - properties->maxTaskPayloadSize + - properties->maxMeshOutputMemorySize; /* 47K min required */ - properties->maxMeshOutputComponents = 128; /* 32x vec4 min required */ + properties->maxTaskPayloadSize + properties->maxMeshOutputMemorySize; /* 47K min required */ + properties->maxMeshOutputComponents = 128; /* 32x vec4 min required */ properties->maxMeshOutputVertices = 256; properties->maxMeshOutputPrimitives = 256; properties->maxMeshOutputLayers = 8; @@ -1884,8 +1832,7 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm fd = open(path, O_RDWR | O_CLOEXEC); if (fd < 0) { - return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "Could not open device %s: %m", - path); + return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "Could not open device %s: %m", path); } version = drmGetVersion(fd); @@ -1910,18 +1857,16 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm } #endif - struct radv_physical_device *device = vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + struct radv_physical_device *device = + vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); if (!device) { result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); goto fail_fd; } struct vk_physical_device_dispatch_table dispatch_table; - vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, - &radv_physical_device_entrypoints, true); - vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, - &wsi_physical_device_entrypoints, false); + vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_physical_device_entrypoints, true); + vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &wsi_physical_device_entrypoints, false); result = vk_physical_device_init(&device->vk, &instance->vk, NULL, NULL, &dispatch_table); if (result != VK_SUCCESS) { @@ -1936,8 +1881,7 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm if (drm_device) { bool reserve_vmid = radv_sqtt_enabled(); - device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, - reserve_vmid); + device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, reserve_vmid); } else { device->ws = radv_null_winsys_create(); } @@ -1959,8 +1903,7 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm .return_size = sizeof(accel_working), .query = AMDGPU_INFO_ACCEL_WORKING}; - if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) < - 0 || + if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) < 0 || !accel_working) { close(master_fd); master_fd = -1; @@ -1985,8 +1928,8 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm #ifdef ANDROID device->emulate_etc2 = !radv_device_supports_etc(device); #else - device->emulate_etc2 = !radv_device_supports_etc(device) && - driQueryOptionb(&device->instance->dri_options, "radv_require_etc2"); + device->emulate_etc2 = + !radv_device_supports_etc(device) && driQueryOptionb(&device->instance->dri_options, "radv_require_etc2"); #endif snprintf(device->name, sizeof(device->name), "AMD RADV %s%s", device->rad_info.name, @@ -1994,8 +1937,7 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm const char *marketing_name = device->ws->get_chip_name(device->ws); snprintf(device->marketing_name, sizeof(device->name), "%s (RADV %s%s)", - marketing_name ? marketing_name : "AMD Unknown", device->rad_info.name, - radv_get_compiler_string(device)); + marketing_name ? marketing_name : "AMD Unknown", device->rad_info.name, radv_get_compiler_string(device)); if (radv_device_get_cache_uuid(device, device->cache_uuid)) { result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID"); @@ -2017,26 +1959,22 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA); - device->use_fmask = - device->rad_info.gfx_level < GFX11 && !(device->instance->debug_flags & RADV_DEBUG_NO_FMASK); + device->use_fmask = device->rad_info.gfx_level < GFX11 && !(device->instance->debug_flags & RADV_DEBUG_NO_FMASK); - device->use_ngg = - (device->rad_info.gfx_level >= GFX10 && device->rad_info.family != CHIP_NAVI14 && - !(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) || - device->rad_info.gfx_level >= GFX11; + device->use_ngg = (device->rad_info.gfx_level >= GFX10 && device->rad_info.family != CHIP_NAVI14 && + !(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) || + device->rad_info.gfx_level >= GFX11; /* TODO: Investigate if NGG culling helps on GFX11. */ - device->use_ngg_culling = device->use_ngg && device->rad_info.max_render_backends > 1 && - (device->rad_info.gfx_level == GFX10_3 || - (device->instance->perftest_flags & RADV_PERFTEST_NGGC)) && - !(device->instance->debug_flags & RADV_DEBUG_NO_NGGC); + device->use_ngg_culling = + device->use_ngg && device->rad_info.max_render_backends > 1 && + (device->rad_info.gfx_level == GFX10_3 || (device->instance->perftest_flags & RADV_PERFTEST_NGGC)) && + !(device->instance->debug_flags & RADV_DEBUG_NO_NGGC); - device->use_ngg_streamout = - device->use_ngg && (device->rad_info.gfx_level >= GFX11 || - (device->instance->perftest_flags & RADV_PERFTEST_NGG_STREAMOUT)); + device->use_ngg_streamout = device->use_ngg && (device->rad_info.gfx_level >= GFX11 || + (device->instance->perftest_flags & RADV_PERFTEST_NGG_STREAMOUT)); - device->emulate_ngg_gs_query_pipeline_stat = - device->use_ngg && device->rad_info.gfx_level < GFX11; + device->emulate_ngg_gs_query_pipeline_stat = device->use_ngg && device->rad_info.gfx_level < GFX11; /* Determine the number of threads per wave for all stages. */ device->cs_wave_size = 64; @@ -2060,8 +1998,8 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm * dependence wave32 would likely be a net-loss (as well as the SALU count becoming more * problematic) */ - if (!(device->instance->perftest_flags & RADV_PERFTEST_RT_WAVE_64) && - !(device->instance->force_rt_wave64) && device->rad_info.gfx_level < GFX11) + if (!(device->instance->perftest_flags & RADV_PERFTEST_RT_WAVE_64) && !(device->instance->force_rt_wave64) && + device->rad_info.gfx_level < GFX11) device->rt_wave_size = 32; } @@ -2083,18 +2021,16 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) && stat(drm_device->nodes[DRM_NODE_PRIMARY], &primary_stat) != 0) { - result = - vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, - "failed to stat DRM primary node %s", drm_device->nodes[DRM_NODE_PRIMARY]); + result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM primary node %s", + drm_device->nodes[DRM_NODE_PRIMARY]); goto fail_perfcounters; } device->primary_devid = primary_stat.st_rdev; if ((drm_device->available_nodes & (1 << DRM_NODE_RENDER)) && stat(drm_device->nodes[DRM_NODE_RENDER], &render_stat) != 0) { - result = - vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM render node %s", - drm_device->nodes[DRM_NODE_RENDER]); + result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM render node %s", + drm_device->nodes[DRM_NODE_RENDER]); goto fail_perfcounters; } device->render_devid = render_stat.st_rdev; @@ -2121,8 +2057,7 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm goto fail_perfcounters; } - device->gs_table_depth = - ac_get_gs_table_depth(device->rad_info.gfx_level, device->rad_info.family); + device->gs_table_depth = ac_get_gs_table_depth(device->rad_info.gfx_level, device->rad_info.family); ac_get_hs_info(&device->rad_info, &device->hs); ac_get_task_info(&device->rad_info, &device->task_info); @@ -2164,8 +2099,7 @@ create_null_physical_device(struct vk_instance *vk_instance) } VkResult -create_drm_physical_device(struct vk_instance *vk_instance, struct _drmDevice *device, - struct vk_physical_device **out) +create_drm_physical_device(struct vk_instance *vk_instance, struct _drmDevice *device, struct vk_physical_device **out) { #ifndef _WIN32 if (!(device->available_nodes & (1 << DRM_NODE_RENDER)) || device->bustype != DRM_BUS_PCI || @@ -2197,8 +2131,7 @@ radv_physical_device_destroy(struct vk_physical_device *vk_device) } static void -radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice, - uint32_t *pCount, +radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice, uint32_t *pCount, VkQueueFamilyProperties **pQueueFamilyProperties) { int num_queue_families = 1; @@ -2223,8 +2156,8 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd idx = 0; if (*pCount >= 1) { *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){ - .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | - VK_QUEUE_SPARSE_BINDING_BIT, + .queueFlags = + VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT, .queueCount = 1, .timestampValidBits = 64, .minImageTransferGranularity = (VkExtent3D){1, 1, 1}, @@ -2236,8 +2169,7 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) { if (*pCount > idx) { *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){ - .queueFlags = - VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT, + .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT, .queueCount = pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues, .timestampValidBits = 64, .minImageTransferGranularity = (VkExtent3D){1, 1, 1}, @@ -2288,31 +2220,25 @@ radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, ui assert(*pCount <= 3); for (uint32_t i = 0; i < *pCount; i++) { - vk_foreach_struct(ext, pQueueFamilyProperties[i].pNext) - { + vk_foreach_struct (ext, pQueueFamilyProperties[i].pNext) { switch (ext->sType) { case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: { - VkQueueFamilyGlobalPriorityPropertiesKHR *prop = - (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext; - STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <= - VK_MAX_GLOBAL_PRIORITY_SIZE_KHR); + VkQueueFamilyGlobalPriorityPropertiesKHR *prop = (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext; + STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <= VK_MAX_GLOBAL_PRIORITY_SIZE_KHR); prop->priorityCount = ARRAY_SIZE(radv_global_queue_priorities); - memcpy(&prop->priorities, radv_global_queue_priorities, - sizeof(radv_global_queue_priorities)); + memcpy(&prop->priorities, radv_global_queue_priorities, sizeof(radv_global_queue_priorities)); break; } case VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR: { - VkQueueFamilyQueryResultStatusPropertiesKHR *prop = - (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext; + VkQueueFamilyQueryResultStatusPropertiesKHR *prop = (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext; prop->queryResultStatusSupport = VK_FALSE; break; } case VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR: { VkQueueFamilyVideoPropertiesKHR *prop = (VkQueueFamilyVideoPropertiesKHR *)ext; - if (pQueueFamilyProperties[i].queueFamilyProperties.queueFlags & - VK_QUEUE_VIDEO_DECODE_BIT_KHR) - prop->videoCodecOperations = VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR; + if (pQueueFamilyProperties[i].queueFamilyProperties.queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) + prop->videoCodecOperations = + VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR; break; } default: @@ -2349,9 +2275,8 @@ radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice, uint64_t total_heap_size = device->memory_properties.memoryHeaps[vram_vis_heap_idx].size; /* Get the different memory usages. */ - uint64_t vram_vis_internal_usage = - device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) + - device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM); + uint64_t vram_vis_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) + + device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM); uint64_t gtt_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT); uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage; uint64_t total_system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) + @@ -2374,12 +2299,10 @@ radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice, /* Get the visible VRAM/GTT heap sizes and internal usages. */ uint64_t gtt_heap_size = device->memory_properties.memoryHeaps[gtt_heap_idx].size; - uint64_t vram_vis_heap_size = - device->memory_properties.memoryHeaps[vram_vis_heap_idx].size; + uint64_t vram_vis_heap_size = device->memory_properties.memoryHeaps[vram_vis_heap_idx].size; - uint64_t vram_vis_internal_usage = - device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) + - device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM); + uint64_t vram_vis_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) + + device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM); uint64_t gtt_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT); /* Compute the total heap size, internal and system usage. */ @@ -2394,18 +2317,16 @@ radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice, uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage); /* Compute the remaining visible VRAM size for this process. */ - uint64_t vram_vis_free_space = - vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage); + uint64_t vram_vis_free_space = vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage); /* Distribute the total free space (2/3rd as VRAM and 1/3rd as GTT) to match the heap * sizes, and align down to the page size to be conservative. */ - vram_vis_free_space = ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space), - device->rad_info.gart_page_size); + vram_vis_free_space = + ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space), device->rad_info.gart_page_size); uint64_t gtt_free_space = total_free_space - vram_vis_free_space; - memoryBudget->heapBudget[vram_vis_heap_idx] = - vram_vis_free_space + vram_vis_internal_usage; + memoryBudget->heapBudget[vram_vis_heap_idx] = vram_vis_free_space + vram_vis_internal_usage; memoryBudget->heapUsage[vram_vis_heap_idx] = vram_vis_internal_usage; memoryBudget->heapBudget[gtt_heap_idx] = gtt_free_space + gtt_internal_usage; memoryBudget->heapUsage[gtt_heap_idx] = gtt_internal_usage; @@ -2479,8 +2400,7 @@ static const VkTimeDomainEXT radv_time_domains[] = { }; VKAPI_ATTR VkResult VKAPI_CALL -radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice, - uint32_t *pTimeDomainCount, +radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice, uint32_t *pTimeDomainCount, VkTimeDomainEXT *pTimeDomains) { int d; @@ -2497,12 +2417,10 @@ radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevic } VKAPI_ATTR void VKAPI_CALL -radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice, - VkSampleCountFlagBits samples, +radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples, VkMultisamplePropertiesEXT *pMultisampleProperties) { - VkSampleCountFlagBits supported_samples = - VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT; + VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT; if (samples & supported_samples) { pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2}; @@ -2512,21 +2430,20 @@ radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice, } VKAPI_ATTR VkResult VKAPI_CALL -radv_GetPhysicalDeviceFragmentShadingRatesKHR( - VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount, - VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates) +radv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount, + VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates) { VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates, pFragmentShadingRateCount); -#define append_rate(w, h, s) \ - { \ - VkPhysicalDeviceFragmentShadingRateKHR rate = { \ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \ - .sampleCounts = s, \ - .fragmentSize = {.width = w, .height = h}, \ - }; \ - vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate; \ +#define append_rate(w, h, s) \ + { \ + VkPhysicalDeviceFragmentShadingRateKHR rate = { \ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \ + .sampleCounts = s, \ + .fragmentSize = {.width = w, .height = h}, \ + }; \ + vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate; \ } for (uint32_t x = 2; x >= 1; x--) { @@ -2536,8 +2453,7 @@ radv_GetPhysicalDeviceFragmentShadingRatesKHR( if (x == 1 && y == 1) { samples = ~0; } else { - samples = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | - VK_SAMPLE_COUNT_8_BIT; + samples = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT; } append_rate(x, y, samples); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 9d23c578..eae6ff9 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -71,13 +71,11 @@ bool radv_pipeline_capture_shader_stats(const struct radv_device *device, VkPipelineCreateFlags flags) { return (flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) || - (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || - device->keep_shader_info; + (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || device->keep_shader_info; } void -radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline, - enum radv_pipeline_type type) +radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline, enum radv_pipeline_type type) { vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); @@ -114,8 +112,7 @@ radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline } VKAPI_ATTR void VKAPI_CALL -radv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, - const VkAllocationCallbacks *pAllocator) +radv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); @@ -137,12 +134,10 @@ radv_pipeline_init_scratch(const struct radv_device *device, struct radv_pipelin if (pipeline->shaders[i] && (pipeline->shaders[i]->config.scratch_bytes_per_wave || is_rt)) { unsigned max_stage_waves = device->scratch_waves; - scratch_bytes_per_wave = - MAX2(scratch_bytes_per_wave, pipeline->shaders[i]->config.scratch_bytes_per_wave); + scratch_bytes_per_wave = MAX2(scratch_bytes_per_wave, pipeline->shaders[i]->config.scratch_bytes_per_wave); - max_stage_waves = - MIN2(max_stage_waves, 4 * device->physical_device->rad_info.num_cu * - radv_get_max_waves(device, pipeline->shaders[i], i)); + max_stage_waves = MIN2(max_stage_waves, 4 * device->physical_device->rad_info.num_cu * + radv_get_max_waves(device, pipeline->shaders[i], i)); max_waves = MAX2(max_waves, max_stage_waves); } } @@ -162,11 +157,10 @@ radv_generate_pipeline_key(const struct radv_device *device, const struct radv_p if (flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT) key.optimisations_disabled = 1; - key.disable_aniso_single_level = device->instance->disable_aniso_single_level && - device->physical_device->rad_info.gfx_level < GFX8; + key.disable_aniso_single_level = + device->instance->disable_aniso_single_level && device->physical_device->rad_info.gfx_level < GFX8; - key.image_2d_view_of_3d = - device->image_2d_view_of_3d && device->physical_device->rad_info.gfx_level == GFX9; + key.image_2d_view_of_3d = device->image_2d_view_of_3d && device->physical_device->rad_info.gfx_level == GFX9; key.tex_non_uniform = device->instance->tex_non_uniform; @@ -210,11 +204,10 @@ radv_get_hash_flags(const struct radv_device *device, bool stats) } void -radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo, - struct radv_pipeline_stage *out_stage, gl_shader_stage stage) +radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo, struct radv_pipeline_stage *out_stage, + gl_shader_stage stage) { - const VkShaderModuleCreateInfo *minfo = - vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO); + const VkShaderModuleCreateInfo *minfo = vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO); const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *iinfo = vk_find_struct_const(sinfo->pNext, PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT); @@ -251,8 +244,7 @@ ycbcr_conversion_lookup(const void *data, uint32_t set, uint32_t binding, uint32 const struct radv_pipeline_layout *layout = data; const struct radv_descriptor_set_layout *set_layout = layout->set[set].layout; - const struct vk_ycbcr_conversion_state *ycbcr_samplers = - radv_immutable_ycbcr_samplers(set_layout, binding); + const struct vk_ycbcr_conversion_state *ycbcr_samplers = radv_immutable_ycbcr_samplers(set_layout, binding); if (!ycbcr_samplers) return NULL; @@ -261,9 +253,8 @@ ycbcr_conversion_lookup(const void *data, uint32_t set, uint32_t binding, uint32 } bool -radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, - unsigned num_components, nir_intrinsic_instr *low, - nir_intrinsic_instr *high, void *data) +radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components, + nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data) { if (num_components > 4) return false; @@ -300,8 +291,7 @@ radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned FALLTHROUGH; case nir_intrinsic_load_shared: case nir_intrinsic_store_shared: - if (bit_size * num_components == - 96) { /* 96 bit loads require 128 bit alignment and are split otherwise */ + if (bit_size * num_components == 96) { /* 96 bit loads require 128 bit alignment and are split otherwise */ return align % 16 == 0; } else if (bit_size == 16 && (align % 4)) { /* AMD hardware can't do 2-byte aligned f16vec2 loads, but they are useful for ALU @@ -359,8 +349,7 @@ lower_bit_size_callback(const nir_instr *instr, void *_) case nir_op_isign: case nir_op_uadd_sat: case nir_op_usub_sat: - return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 - : 0; + return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 : 0; case nir_op_iadd_sat: case nir_op_isub_sat: return bit_size == 8 || !nir_dest_is_divergent(alu->dest.dest) ? 32 : 0; @@ -383,8 +372,7 @@ lower_bit_size_callback(const nir_instr *instr, void *_) case nir_op_ine: case nir_op_ult: case nir_op_uge: - return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 - : 0; + return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 : 0; default: return 0; } @@ -470,8 +458,8 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_layo } enum nir_lower_non_uniform_access_type lower_non_uniform_access_types = - nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access | - nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access; + nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access | nir_lower_non_uniform_texture_access | + nir_lower_non_uniform_image_access; /* In practice, most shaders do not have non-uniform-qualified * accesses (see @@ -495,8 +483,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_layo NIR_PASS(_, stage->nir, nir_lower_memory_model); nir_load_store_vectorize_options vectorize_opts = { - .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_shared | - nir_var_mem_global, + .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_shared | nir_var_mem_global, .callback = radv_mem_vectorize_callback, .robust_modes = 0, /* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if @@ -514,8 +501,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_layo NIR_PASS(progress, stage->nir, nir_opt_load_store_vectorize, &vectorize_opts); if (progress) { NIR_PASS(_, stage->nir, nir_copy_prop); - NIR_PASS(_, stage->nir, nir_opt_shrink_stores, - !device->instance->disable_shrink_image_store); + NIR_PASS(_, stage->nir, nir_opt_shrink_stores, !device->instance->disable_shrink_image_store); /* Gather info again, to update whether 8/16-bit are used. */ nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir)); @@ -551,8 +537,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_layo if (stage->nir->info.uses_resource_info_query) NIR_PASS(_, stage->nir, ac_nir_lower_resinfo, gfx_level); - NIR_PASS_V(stage->nir, radv_nir_apply_pipeline_layout, device, pipeline_layout, &stage->info, - &stage->args); + NIR_PASS_V(stage->nir, radv_nir_apply_pipeline_layout, device, pipeline_layout, &stage->info, &stage->args); if (!pipeline_key->optimisations_disabled) { NIR_PASS(_, stage->nir, nir_opt_shrink_vectors); @@ -570,16 +555,14 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_layo sink_opts |= nir_move_load_input; NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts); - NIR_PASS(_, stage->nir, nir_opt_move, - nir_move_load_input | nir_move_const_undef | nir_move_copies); + NIR_PASS(_, stage->nir, nir_opt_move, nir_move_load_input | nir_move_const_undef | nir_move_copies); } /* Lower VS inputs. We need to do this after nir_opt_sink, because * load_input can be reordered, but buffer loads can't. */ if (stage->stage == MESA_SHADER_VERTEX) { - NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, pipeline_key, - &device->physical_device->rad_info); + NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, pipeline_key, &device->physical_device->rad_info); } /* Lower I/O intrinsics to memory instructions. */ @@ -592,8 +575,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_layo NIR_PASS_V(stage->nir, ac_nir_lower_legacy_vs, gfx_level, stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask, stage->info.outinfo.vs_output_param_offset, stage->info.outinfo.param_exports, - stage->info.outinfo.export_prim_id, false, false, - stage->info.force_vrs_per_vertex); + stage->info.outinfo.export_prim_id, false, false, stage->info.force_vrs_per_vertex); } else { ac_nir_gs_output_info gs_out_info = { @@ -614,11 +596,9 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_layo * present. */ .alpha_to_coverage_via_mrtz = stage->info.ps.writes_mrt0_alpha, - .dual_src_blend_swizzle = - pipeline_key->ps.epilog.mrt0_is_dual_src && gfx_level >= GFX11, + .dual_src_blend_swizzle = pipeline_key->ps.epilog.mrt0_is_dual_src && gfx_level >= GFX11, /* Need to filter out unwritten color slots. */ - .spi_shader_col_format = - pipeline_key->ps.epilog.spi_shader_col_format & stage->info.ps.colors_written, + .spi_shader_col_format = pipeline_key->ps.epilog.spi_shader_col_format & stage->info.ps.colors_written, .color_is_int8 = pipeline_key->ps.epilog.color_is_int8, .color_is_int10 = pipeline_key->ps.epilog.color_is_int10, .alpha_func = COMPARE_FUNC_ALWAYS, @@ -648,9 +628,8 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_layo NIR_PASS(_, stage->nir, ac_nir_lower_global_access); NIR_PASS_V(stage->nir, radv_nir_lower_abi, gfx_level, &stage->info, &stage->args, pipeline_key, device->physical_device->rad_info.address32_hi); - radv_optimize_nir_algebraic(stage->nir, io_to_mem || lowered_ngg || - stage->stage == MESA_SHADER_COMPUTE || - stage->stage == MESA_SHADER_TASK); + radv_optimize_nir_algebraic( + stage->nir, io_to_mem || lowered_ngg || stage->stage == MESA_SHADER_COMPUTE || stage->stage == MESA_SHADER_TASK); if (stage->nir->info.bit_sizes_int & (8 | 16)) { if (gfx_level >= GFX8) { @@ -665,16 +644,13 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_layo if (gfx_level >= GFX8) NIR_PASS(_, stage->nir, nir_opt_remove_phis); /* cleanup LCSSA phis */ } - if (((stage->nir->info.bit_sizes_int | stage->nir->info.bit_sizes_float) & 16) && - gfx_level >= GFX9) { + if (((stage->nir->info.bit_sizes_int | stage->nir->info.bit_sizes_float) & 16) && gfx_level >= GFX9) { bool separate_g16 = gfx_level >= GFX10; struct nir_fold_tex_srcs_options fold_srcs_options[] = { { - .sampler_dims = - ~(BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) | BITFIELD_BIT(GLSL_SAMPLER_DIM_BUF)), - .src_types = (1 << nir_tex_src_coord) | (1 << nir_tex_src_lod) | - (1 << nir_tex_src_bias) | (1 << nir_tex_src_min_lod) | - (1 << nir_tex_src_ms_index) | + .sampler_dims = ~(BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) | BITFIELD_BIT(GLSL_SAMPLER_DIM_BUF)), + .src_types = (1 << nir_tex_src_coord) | (1 << nir_tex_src_lod) | (1 << nir_tex_src_bias) | + (1 << nir_tex_src_min_lod) | (1 << nir_tex_src_ms_index) | (separate_g16 ? 0 : (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy)), }, { @@ -708,8 +684,8 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_layo sink_opts |= nir_move_comparisons | nir_move_load_ubo | nir_move_load_ssbo; NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts); - nir_move_options move_opts = nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | - nir_move_comparisons | nir_move_copies; + nir_move_options move_opts = + nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | nir_move_comparisons | nir_move_copies; NIR_PASS(_, stage->nir, nir_opt_move, move_opts); } } @@ -725,8 +701,7 @@ radv_get_executable_count(struct radv_pipeline *pipeline) if (!pipeline->shaders[i]) continue; - if (i == MESA_SHADER_GEOMETRY && - !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) { + if (i == MESA_SHADER_GEOMETRY && !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) { ret += 2u; } else { ret += 1u; @@ -736,8 +711,7 @@ radv_get_executable_count(struct radv_pipeline *pipeline) } static struct radv_shader * -radv_get_shader_from_executable_index(struct radv_pipeline *pipeline, int index, - gl_shader_stage *stage) +radv_get_shader_from_executable_index(struct radv_pipeline *pipeline, int index, gl_shader_stage *stage) { if (pipeline->type == RADV_PIPELINE_RAY_TRACING) { *stage = MESA_SHADER_INTERSECTION; @@ -754,8 +728,7 @@ radv_get_shader_from_executable_index(struct radv_pipeline *pipeline, int index, --index; - if (i == MESA_SHADER_GEOMETRY && - !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) { + if (i == MESA_SHADER_GEOMETRY && !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) { if (!index) { *stage = i; return pipeline->gs_copy_shader; @@ -781,8 +754,7 @@ desc_copy(char *desc, const char *src) VKAPI_ATTR VkResult VKAPI_CALL radv_GetPipelineExecutablePropertiesKHR(VkDevice _device, const VkPipelineInfoKHR *pPipelineInfo, - uint32_t *pExecutableCount, - VkPipelineExecutablePropertiesKHR *pProperties) + uint32_t *pExecutableCount, VkPipelineExecutablePropertiesKHR *pProperties) { RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelineInfo->pipeline); const uint32_t total_count = radv_get_executable_count(pipeline); @@ -879,8 +851,7 @@ radv_GetPipelineExecutablePropertiesKHR(VkDevice _device, const VkPipelineInfoKH desc_copy(pProperties[executable_idx].description, description); ++executable_idx; - if (i == MESA_SHADER_GEOMETRY && - !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) { + if (i == MESA_SHADER_GEOMETRY && !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) { assert(pipeline->gs_copy_shader); if (executable_idx >= count) break; @@ -901,10 +872,8 @@ radv_GetPipelineExecutablePropertiesKHR(VkDevice _device, const VkPipelineInfoKH } VKAPI_ATTR VkResult VKAPI_CALL -radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, - const VkPipelineExecutableInfoKHR *pExecutableInfo, - uint32_t *pStatisticCount, - VkPipelineExecutableStatisticKHR *pStatistics) +radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo, + uint32_t *pStatisticCount, VkPipelineExecutableStatisticKHR *pStatistics) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline); @@ -915,7 +884,8 @@ radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const struct radv_physical_device *pdevice = device->physical_device; unsigned lds_increment = pdevice->rad_info.gfx_level >= GFX11 && stage == MESA_SHADER_FRAGMENT - ? 1024 : pdevice->rad_info.lds_encode_granularity; + ? 1024 + : pdevice->rad_info.lds_encode_granularity; unsigned max_waves = radv_get_max_waves(device, shader, stage); VkPipelineExecutableStatisticKHR *s = pStatistics; @@ -1039,8 +1009,7 @@ radv_copy_representation(void *data, size_t *data_size, const char *src) VKAPI_ATTR VkResult VKAPI_CALL radv_GetPipelineExecutableInternalRepresentationsKHR( - VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo, - uint32_t *pInternalRepresentationCount, + VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo, uint32_t *pInternalRepresentationCount, VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations) { RADV_FROM_HANDLE(radv_device, device, _device); @@ -1123,12 +1092,10 @@ radv_copy_shader_stage_create_info(struct radv_device *device, uint32_t stageCou for (uint32_t i = 0; i < stageCount; i++) { RADV_FROM_HANDLE(vk_shader_module, module, new_stages[i].module); - const VkShaderModuleCreateInfo *minfo = - vk_find_struct_const(pStages[i].pNext, SHADER_MODULE_CREATE_INFO); + const VkShaderModuleCreateInfo *minfo = vk_find_struct_const(pStages[i].pNext, SHADER_MODULE_CREATE_INFO); if (module) { - struct vk_shader_module *new_module = - ralloc_size(mem_ctx, sizeof(struct vk_shader_module) + module->size); + struct vk_shader_module *new_module = ralloc_size(mem_ctx, sizeof(struct vk_shader_module) + module->size); if (!new_module) return NULL; diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c index 8e62fa5..6deedf4 100644 --- a/src/amd/vulkan/radv_pipeline_cache.c +++ b/src/amd/vulkan/radv_pipeline_cache.c @@ -45,9 +45,8 @@ radv_is_cache_disabled(struct radv_device *device) } void -radv_hash_shaders(unsigned char *hash, const struct radv_pipeline_stage *stages, - uint32_t stage_count, const struct radv_pipeline_layout *layout, - const struct radv_pipeline_key *key, uint32_t flags) +radv_hash_shaders(unsigned char *hash, const struct radv_pipeline_stage *stages, uint32_t stage_count, + const struct radv_pipeline_layout *layout, const struct radv_pipeline_key *key, uint32_t flags) { struct mesa_sha1 ctx; @@ -68,8 +67,7 @@ radv_hash_shaders(unsigned char *hash, const struct radv_pipeline_stage *stages, } void -radv_hash_rt_stages(struct mesa_sha1 *ctx, const VkPipelineShaderStageCreateInfo *stages, - unsigned stage_count) +radv_hash_rt_stages(struct mesa_sha1 *ctx, const VkPipelineShaderStageCreateInfo *stages, unsigned stage_count) { for (unsigned i = 0; i < stage_count; ++i) { unsigned char hash[20]; @@ -80,8 +78,7 @@ radv_hash_rt_stages(struct mesa_sha1 *ctx, const VkPipelineShaderStageCreateInfo void radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, - const struct radv_pipeline_key *key, - const struct radv_ray_tracing_group *groups, uint32_t flags) + const struct radv_pipeline_key *key, const struct radv_ray_tracing_group *groups, uint32_t flags) { RADV_FROM_HANDLE(radv_pipeline_layout, layout, pCreateInfo->layout); struct mesa_sha1 ctx; @@ -95,12 +92,9 @@ radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKH radv_hash_rt_stages(&ctx, pCreateInfo->pStages, pCreateInfo->stageCount); for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) { - _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].type, - sizeof(pCreateInfo->pGroups[i].type)); - _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].generalShader, - sizeof(pCreateInfo->pGroups[i].generalShader)); - _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].anyHitShader, - sizeof(pCreateInfo->pGroups[i].anyHitShader)); + _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].type, sizeof(pCreateInfo->pGroups[i].type)); + _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].generalShader, sizeof(pCreateInfo->pGroups[i].generalShader)); + _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].anyHitShader, sizeof(pCreateInfo->pGroups[i].anyHitShader)); _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].closestHitShader, sizeof(pCreateInfo->pGroups[i].closestHitShader)); _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].intersectionShader, @@ -117,13 +111,12 @@ radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKH } const uint32_t pipeline_flags = - pCreateInfo->flags & (VK_PIPELINE_CREATE_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR | - VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR | - VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR | - VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR | - VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR | - VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR | - VK_PIPELINE_CREATE_LIBRARY_BIT_KHR); + pCreateInfo->flags & + (VK_PIPELINE_CREATE_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR | VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR | + VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR | + VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR | + VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR | + VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR | VK_PIPELINE_CREATE_LIBRARY_BIT_KHR); _mesa_sha1_update(&ctx, &pipeline_flags, 4); _mesa_sha1_update(&ctx, &flags, 4); @@ -159,8 +152,7 @@ radv_shader_deserialize(struct vk_pipeline_cache *cache, const void *key_data, s struct blob_reader *blob) { struct radv_device *device = container_of(cache->base.device, struct radv_device, vk); - const struct radv_shader_binary *binary = - blob_read_bytes(blob, sizeof(struct radv_shader_binary)); + const struct radv_shader_binary *binary = blob_read_bytes(blob, sizeof(struct radv_shader_binary)); assert(key_size == SHA1_DIGEST_LENGTH); struct radv_shader *shader = radv_shader_create(device, binary); @@ -219,8 +211,8 @@ radv_shader_create_cached(struct radv_device *device, struct vk_pipeline_cache * /* TODO: Skip disk-cache for meta-shaders because they are stored in a different cache file */ struct vk_pipeline_cache_object *shader_obj; - shader_obj = vk_pipeline_cache_create_and_insert_object(cache, hash, SHA1_DIGEST_LENGTH, binary, - binary->total_size, &radv_shader_ops); + shader_obj = vk_pipeline_cache_create_and_insert_object(cache, hash, SHA1_DIGEST_LENGTH, binary, binary->total_size, + &radv_shader_ops); return shader_obj ? container_of(shader_obj, struct radv_shader, base) : NULL; } @@ -249,17 +241,14 @@ radv_pipeline_cache_object_create(struct vk_device *device, unsigned num_shaders unsigned num_stack_sizes, unsigned ps_epilog_binary_size) { assert(num_stack_sizes == 0 || ps_epilog_binary_size == 0); - const size_t size = sizeof(struct radv_pipeline_cache_object) + - (num_shaders * sizeof(struct radv_shader *)) + ps_epilog_binary_size + - (num_stack_sizes * sizeof(uint32_t)); + const size_t size = sizeof(struct radv_pipeline_cache_object) + (num_shaders * sizeof(struct radv_shader *)) + + ps_epilog_binary_size + (num_stack_sizes * sizeof(uint32_t)); - struct radv_pipeline_cache_object *object = - vk_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE); + struct radv_pipeline_cache_object *object = vk_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE); if (!object) return NULL; - vk_pipeline_cache_object_init(device, &object->base, &radv_pipeline_ops, object->sha1, - SHA1_DIGEST_LENGTH); + vk_pipeline_cache_object_init(device, &object->base, &radv_pipeline_ops, object->sha1, SHA1_DIGEST_LENGTH); object->num_shaders = num_shaders; object->num_stack_sizes = num_stack_sizes; object->ps_epilog_binary_size = ps_epilog_binary_size; @@ -272,12 +261,10 @@ radv_pipeline_cache_object_create(struct vk_device *device, unsigned num_shaders } static void -radv_pipeline_cache_object_destroy(struct vk_device *_device, - struct vk_pipeline_cache_object *object) +radv_pipeline_cache_object_destroy(struct vk_device *_device, struct vk_pipeline_cache_object *object) { struct radv_device *device = container_of(_device, struct radv_device, vk); - struct radv_pipeline_cache_object *pipeline_obj = - container_of(object, struct radv_pipeline_cache_object, base); + struct radv_pipeline_cache_object *pipeline_obj = container_of(object, struct radv_pipeline_cache_object, base); for (unsigned i = 0; i < pipeline_obj->num_shaders; i++) { if (pipeline_obj->shaders[i]) @@ -291,8 +278,8 @@ radv_pipeline_cache_object_destroy(struct vk_device *_device, } static struct vk_pipeline_cache_object * -radv_pipeline_cache_object_deserialize(struct vk_pipeline_cache *cache, const void *key_data, - size_t key_size, struct blob_reader *blob) +radv_pipeline_cache_object_deserialize(struct vk_pipeline_cache *cache, const void *key_data, size_t key_size, + struct blob_reader *blob) { struct radv_device *device = container_of(cache->base.device, struct radv_device, vk); assert(key_size == SHA1_DIGEST_LENGTH); @@ -302,8 +289,8 @@ radv_pipeline_cache_object_deserialize(struct vk_pipeline_cache *cache, const vo unsigned ps_epilog_binary_size = blob_read_uint32(blob); struct radv_pipeline_cache_object *object; - object = radv_pipeline_cache_object_create(&device->vk, num_shaders, key_data, num_stack_sizes, - ps_epilog_binary_size); + object = + radv_pipeline_cache_object_create(&device->vk, num_shaders, key_data, num_stack_sizes, ps_epilog_binary_size); if (!object) return NULL; @@ -331,8 +318,7 @@ radv_pipeline_cache_object_deserialize(struct vk_pipeline_cache *cache, const vo if (ps_epilog_binary_size) { assert(num_stack_sizes == 0); struct radv_shader_part_binary *binary = object->data; - object->ps_epilog = - radv_shader_part_create(device, binary, device->physical_device->ps_wave_size); + object->ps_epilog = radv_shader_part_create(device, binary, device->physical_device->ps_wave_size); if (!object->ps_epilog) { vk_pipeline_cache_object_unref(&device->vk, &object->base); @@ -346,8 +332,7 @@ radv_pipeline_cache_object_deserialize(struct vk_pipeline_cache *cache, const vo static bool radv_pipeline_cache_object_serialize(struct vk_pipeline_cache_object *object, struct blob *blob) { - struct radv_pipeline_cache_object *pipeline_obj = - container_of(object, struct radv_pipeline_cache_object, base); + struct radv_pipeline_cache_object *pipeline_obj = container_of(object, struct radv_pipeline_cache_object, base); blob_write_uint32(blob, pipeline_obj->num_shaders); blob_write_uint32(blob, pipeline_obj->num_stack_sizes); @@ -356,8 +341,7 @@ radv_pipeline_cache_object_serialize(struct vk_pipeline_cache_object *object, st for (unsigned i = 0; i < pipeline_obj->num_shaders; i++) blob_write_bytes(blob, pipeline_obj->shaders[i]->sha1, SHA1_DIGEST_LENGTH); - const size_t data_size = - pipeline_obj->ps_epilog_binary_size + (pipeline_obj->num_stack_sizes * sizeof(uint32_t)); + const size_t data_size = pipeline_obj->ps_epilog_binary_size + (pipeline_obj->num_stack_sizes * sizeof(uint32_t)); blob_write_bytes(blob, pipeline_obj->data, data_size); return true; @@ -370,9 +354,8 @@ const struct vk_pipeline_cache_object_ops radv_pipeline_ops = { }; bool -radv_pipeline_cache_search(struct radv_device *device, struct vk_pipeline_cache *cache, - struct radv_pipeline *pipeline, const unsigned char *sha1, - bool *found_in_application_cache) +radv_pipeline_cache_search(struct radv_device *device, struct vk_pipeline_cache *cache, struct radv_pipeline *pipeline, + const unsigned char *sha1, bool *found_in_application_cache) { *found_in_application_cache = false; @@ -391,8 +374,7 @@ radv_pipeline_cache_search(struct radv_device *device, struct vk_pipeline_cache if (!object) return false; - struct radv_pipeline_cache_object *pipeline_obj = - container_of(object, struct radv_pipeline_cache_object, base); + struct radv_pipeline_cache_object *pipeline_obj = container_of(object, struct radv_pipeline_cache_object, base); for (unsigned i = 0; i < pipeline_obj->num_shaders; i++) { gl_shader_stage s = pipeline_obj->shaders[i]->info.stage; @@ -419,10 +401,8 @@ radv_pipeline_cache_search(struct radv_device *device, struct vk_pipeline_cache } void -radv_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache, - struct radv_pipeline *pipeline, - struct radv_shader_part_binary *ps_epilog_binary, - const unsigned char *sha1) +radv_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache, struct radv_pipeline *pipeline, + struct radv_shader_part_binary *ps_epilog_binary, const unsigned char *sha1) { if (radv_is_cache_disabled(device)) return; @@ -439,8 +419,7 @@ radv_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache unsigned ps_epilog_binary_size = ps_epilog_binary ? ps_epilog_binary->total_size : 0; struct radv_pipeline_cache_object *pipeline_obj; - pipeline_obj = - radv_pipeline_cache_object_create(&device->vk, num_shaders, sha1, 0, ps_epilog_binary_size); + pipeline_obj = radv_pipeline_cache_object_create(&device->vk, num_shaders, sha1, 0, ps_epilog_binary_size); if (!pipeline_obj) return; @@ -468,8 +447,7 @@ radv_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache } /* Add the object to the cache */ - struct vk_pipeline_cache_object *object = - vk_pipeline_cache_add_object(cache, &pipeline_obj->base); + struct vk_pipeline_cache_object *object = vk_pipeline_cache_add_object(cache, &pipeline_obj->base); vk_pipeline_cache_object_unref(&device->vk, object); } @@ -485,29 +463,26 @@ radv_ray_tracing_pipeline_cache_search(struct radv_device *device, struct vk_pip cache = device->mem_cache; bool cache_hit = false; - struct vk_pipeline_cache_object *object = vk_pipeline_cache_lookup_object( - cache, pipeline->sha1, SHA1_DIGEST_LENGTH, &radv_pipeline_ops, &cache_hit); + struct vk_pipeline_cache_object *object = + vk_pipeline_cache_lookup_object(cache, pipeline->sha1, SHA1_DIGEST_LENGTH, &radv_pipeline_ops, &cache_hit); if (!object) return false; - struct radv_pipeline_cache_object *pipeline_obj = - container_of(object, struct radv_pipeline_cache_object, base); + struct radv_pipeline_cache_object *pipeline_obj = container_of(object, struct radv_pipeline_cache_object, base); bool is_library = pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR; bool complete = true; unsigned idx = 0; if (!is_library) - pipeline->base.base.shaders[MESA_SHADER_INTERSECTION] = - radv_shader_ref(pipeline_obj->shaders[idx++]); + pipeline->base.base.shaders[MESA_SHADER_INTERSECTION] = radv_shader_ref(pipeline_obj->shaders[idx++]); for (unsigned i = 0; i < pCreateInfo->stageCount; i++) { if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i])) { pipeline->stages[i].shader = &radv_shader_ref(pipeline_obj->shaders[idx++])->base; } else if (is_library) { - pipeline->stages[i].shader = - radv_pipeline_cache_search_nir(device, cache, pipeline->stages[i].sha1); + pipeline->stages[i].shader = radv_pipeline_cache_search_nir(device, cache, pipeline->stages[i].sha1); complete &= pipeline->stages[i].shader != NULL; } } @@ -532,8 +507,8 @@ radv_ray_tracing_pipeline_cache_search(struct radv_device *device, struct vk_pip void radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache, - struct radv_ray_tracing_pipeline *pipeline, - unsigned num_stages, const unsigned char *sha1) + struct radv_ray_tracing_pipeline *pipeline, unsigned num_stages, + const unsigned char *sha1) { if (radv_is_cache_disabled(device)) return; @@ -551,8 +526,7 @@ radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct vk_pip unsigned idx = 0; if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]) - pipeline_obj->shaders[idx++] = - radv_shader_ref(pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]); + pipeline_obj->shaders[idx++] = radv_shader_ref(pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]); for (unsigned i = 0; i < num_stages; ++i) { if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i])) @@ -566,14 +540,12 @@ radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct vk_pip stack_sizes[i] = pipeline->stages[i].stack_size; /* Add the object to the cache */ - struct vk_pipeline_cache_object *object = - vk_pipeline_cache_add_object(cache, &pipeline_obj->base); + struct vk_pipeline_cache_object *object = vk_pipeline_cache_add_object(cache, &pipeline_obj->base); vk_pipeline_cache_object_unref(&device->vk, object); } struct vk_pipeline_cache_object * -radv_pipeline_cache_search_nir(struct radv_device *device, struct vk_pipeline_cache *cache, - const uint8_t *sha1) +radv_pipeline_cache_search_nir(struct radv_device *device, struct vk_pipeline_cache *cache, const uint8_t *sha1) { if (radv_is_cache_disabled(device)) return NULL; @@ -581,17 +553,14 @@ radv_pipeline_cache_search_nir(struct radv_device *device, struct vk_pipeline_ca if (!cache) cache = device->mem_cache; - return vk_pipeline_cache_lookup_object(cache, sha1, SHA1_DIGEST_LENGTH, - &vk_raw_data_cache_object_ops, NULL); + return vk_pipeline_cache_lookup_object(cache, sha1, SHA1_DIGEST_LENGTH, &vk_raw_data_cache_object_ops, NULL); } struct nir_shader * -radv_pipeline_cache_handle_to_nir(struct radv_device *device, - struct vk_pipeline_cache_object *object) +radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline_cache_object *object) { struct blob_reader blob; - struct vk_raw_data_cache_object *nir_object = - container_of(object, struct vk_raw_data_cache_object, base); + struct vk_raw_data_cache_object *nir_object = container_of(object, struct vk_raw_data_cache_object, base); blob_reader_init(&blob, nir_object->data, nir_object->data_size); nir_shader *nir = nir_deserialize(NULL, NULL, &blob); @@ -605,8 +574,8 @@ radv_pipeline_cache_handle_to_nir(struct radv_device *device, } struct vk_pipeline_cache_object * -radv_pipeline_cache_nir_to_handle(struct radv_device *device, struct vk_pipeline_cache *cache, - struct nir_shader *nir, const uint8_t *sha1, bool cached) +radv_pipeline_cache_nir_to_handle(struct radv_device *device, struct vk_pipeline_cache *cache, struct nir_shader *nir, + const uint8_t *sha1, bool cached) { if (!cache) cache = device->mem_cache; @@ -626,8 +595,8 @@ radv_pipeline_cache_nir_to_handle(struct radv_device *device, struct vk_pipeline struct vk_pipeline_cache_object *object; if (cached && !radv_is_cache_disabled(device)) { - object = vk_pipeline_cache_create_and_insert_object(cache, sha1, SHA1_DIGEST_LENGTH, data, - size, &vk_raw_data_cache_object_ops); + object = vk_pipeline_cache_create_and_insert_object(cache, sha1, SHA1_DIGEST_LENGTH, data, size, + &vk_raw_data_cache_object_ops); } else { struct vk_raw_data_cache_object *nir_object = vk_raw_data_cache_object_create(&device->vk, sha1, SHA1_DIGEST_LENGTH, data, size); diff --git a/src/amd/vulkan/radv_pipeline_compute.c b/src/amd/vulkan/radv_pipeline_compute.c index ad58c2d..61508c1 100644 --- a/src/amd/vulkan/radv_pipeline_compute.c +++ b/src/amd/vulkan/radv_pipeline_compute.c @@ -70,8 +70,8 @@ radv_pipeline_emit_hw_cs(const struct radv_physical_device *pdevice, struct rade } void -radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice, - struct radeon_cmdbuf *cs, const struct radv_shader *shader) +radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, + const struct radv_shader *shader) { unsigned threads_per_threadgroup; unsigned threadgroups_per_cu = 1; @@ -86,9 +86,9 @@ radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice, if (pdevice->rad_info.gfx_level >= GFX10 && waves_per_threadgroup == 1) threadgroups_per_cu = 2; - radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, - ac_get_compute_resource_limits(&pdevice->rad_info, waves_per_threadgroup, - max_waves_per_sh, threadgroups_per_cu)); + radeon_set_sh_reg( + cs, R_00B854_COMPUTE_RESOURCE_LIMITS, + ac_get_compute_resource_limits(&pdevice->rad_info, waves_per_threadgroup, max_waves_per_sh, threadgroups_per_cu)); radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0])); @@ -113,20 +113,17 @@ radv_compute_generate_pm4(const struct radv_device *device, struct radv_compute_ } static struct radv_pipeline_key -radv_generate_compute_pipeline_key(const struct radv_device *device, - struct radv_compute_pipeline *pipeline, +radv_generate_compute_pipeline_key(const struct radv_device *device, struct radv_compute_pipeline *pipeline, const VkComputePipelineCreateInfo *pCreateInfo) { const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->stage; - struct radv_pipeline_key key = - radv_generate_pipeline_key(device, &pipeline->base, pCreateInfo->flags); + struct radv_pipeline_key key = radv_generate_pipeline_key(device, &pipeline->base, pCreateInfo->flags); const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_size = vk_find_struct_const(stage->pNext, PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO); if (subgroup_size) { - assert(subgroup_size->requiredSubgroupSize == 32 || - subgroup_size->requiredSubgroupSize == 64); + assert(subgroup_size->requiredSubgroupSize == 32 || subgroup_size->requiredSubgroupSize == 64); key.cs.compute_subgroup_size = subgroup_size->requiredSubgroupSize; } else if (stage->flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT) { key.cs.require_full_subgroups = true; @@ -152,12 +149,10 @@ radv_compute_pipeline_init(const struct radv_device *device, struct radv_compute } static VkResult -radv_compute_pipeline_compile(struct radv_compute_pipeline *pipeline, - struct radv_pipeline_layout *pipeline_layout, +radv_compute_pipeline_compile(struct radv_compute_pipeline *pipeline, struct radv_pipeline_layout *pipeline_layout, struct radv_device *device, struct vk_pipeline_cache *cache, const struct radv_pipeline_key *pipeline_key, - const VkPipelineShaderStageCreateInfo *pStage, - const VkPipelineCreateFlags flags, + const VkPipelineShaderStageCreateInfo *pStage, const VkPipelineCreateFlags flags, const VkPipelineCreationFeedbackCreateInfo *creation_feedback) { struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL}; @@ -180,11 +175,10 @@ radv_compute_pipeline_compile(struct radv_compute_pipeline *pipeline, pipeline->base.pipeline_hash = *(uint64_t *)hash; bool found_in_application_cache = true; - if (!keep_executable_info && radv_pipeline_cache_search(device, cache, &pipeline->base, hash, - &found_in_application_cache)) { + if (!keep_executable_info && + radv_pipeline_cache_search(device, cache, &pipeline->base, hash, &found_in_application_cache)) { if (found_in_application_cache) - pipeline_feedback.flags |= - VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; + pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; result = VK_SUCCESS; goto done; } @@ -195,8 +189,7 @@ radv_compute_pipeline_compile(struct radv_compute_pipeline *pipeline, int64_t stage_start = os_time_get_nano(); /* Compile SPIR-V shader to NIR. */ - cs_stage.nir = - radv_shader_spirv_to_nir(device, &cs_stage, pipeline_key, pipeline->base.is_internal); + cs_stage.nir = radv_shader_spirv_to_nir(device, &cs_stage, pipeline_key, pipeline->base.is_internal); radv_optimize_nir(cs_stage.nir, pipeline_key->optimisations_disabled); @@ -207,11 +200,11 @@ radv_compute_pipeline_compile(struct radv_compute_pipeline *pipeline, /* Run the shader info pass. */ radv_nir_shader_info_init(&cs_stage.info); - radv_nir_shader_info_pass(device, cs_stage.nir, MESA_SHADER_NONE, pipeline_layout, pipeline_key, - pipeline->base.type, false, &cs_stage.info); + radv_nir_shader_info_pass(device, cs_stage.nir, MESA_SHADER_NONE, pipeline_layout, pipeline_key, pipeline->base.type, + false, &cs_stage.info); - radv_declare_shader_args(device, pipeline_key, &cs_stage.info, MESA_SHADER_COMPUTE, - MESA_SHADER_NONE, RADV_SHADER_TYPE_DEFAULT, &cs_stage.args); + radv_declare_shader_args(device, pipeline_key, &cs_stage.info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE, + RADV_SHADER_TYPE_DEFAULT, &cs_stage.args); cs_stage.info.user_sgprs_locs = cs_stage.args.user_sgprs_locs; cs_stage.info.inline_push_constant_mask = cs_stage.args.ac.inline_push_const_mask; @@ -225,9 +218,9 @@ radv_compute_pipeline_compile(struct radv_compute_pipeline *pipeline, nir_print_shader(cs_stage.nir, stderr); /* Compile NIR shader to AMD assembly. */ - pipeline->base.shaders[MESA_SHADER_COMPUTE] = radv_shader_nir_to_asm( - device, cache, &cs_stage, &cs_stage.nir, 1, pipeline_key, keep_executable_info, - keep_statistic_info, &binaries[MESA_SHADER_COMPUTE]); + pipeline->base.shaders[MESA_SHADER_COMPUTE] = + radv_shader_nir_to_asm(device, cache, &cs_stage, &cs_stage.nir, 1, pipeline_key, keep_executable_info, + keep_statistic_info, &binaries[MESA_SHADER_COMPUTE]); cs_stage.feedback.duration += os_time_get_nano() - stage_start; @@ -247,8 +240,8 @@ radv_compute_pipeline_compile(struct radv_compute_pipeline *pipeline, free(binaries[MESA_SHADER_COMPUTE]); if (radv_can_dump_shader_stats(device, cs_stage.nir)) { - radv_dump_shader_stats(device, &pipeline->base, pipeline->base.shaders[MESA_SHADER_COMPUTE], - MESA_SHADER_COMPUTE, stderr); + radv_dump_shader_stats(device, &pipeline->base, pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE, + stderr); } ralloc_free(cs_stage.nir); @@ -268,8 +261,7 @@ done: } VkResult -radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, - const VkComputePipelineCreateInfo *pCreateInfo, +radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkComputePipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) { RADV_FROM_HANDLE(radv_device, device, _device); @@ -278,8 +270,7 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, struct radv_compute_pipeline *pipeline; VkResult result; - pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) { return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } @@ -292,9 +283,8 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, struct radv_pipeline_key key = radv_generate_compute_pipeline_key(device, pipeline, pCreateInfo); - result = - radv_compute_pipeline_compile(pipeline, pipeline_layout, device, cache, &key, - &pCreateInfo->stage, pCreateInfo->flags, creation_feedback); + result = radv_compute_pipeline_compile(pipeline, pipeline_layout, device, cache, &key, &pCreateInfo->stage, + pCreateInfo->flags, creation_feedback); if (result != VK_SUCCESS) { radv_pipeline_destroy(device, &pipeline->base, pAllocator); return result; @@ -303,23 +293,21 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, radv_compute_pipeline_init(device, pipeline, pipeline_layout); *pPipeline = radv_pipeline_to_handle(&pipeline->base); - radv_rmv_log_compute_pipeline_create(device, pCreateInfo->flags, &pipeline->base, - pipeline->base.is_internal); + radv_rmv_log_compute_pipeline_create(device, pCreateInfo->flags, &pipeline->base, pipeline->base.is_internal); return VK_SUCCESS; } static VkResult radv_create_compute_pipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, - const VkComputePipelineCreateInfo *pCreateInfos, - const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) + const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) { VkResult result = VK_SUCCESS; unsigned i = 0; for (; i < count; i++) { VkResult r; - r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, - &pPipelines[i]); + r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i]); if (r != VK_SUCCESS) { result = r; pPipelines[i] = VK_NULL_HANDLE; @@ -344,9 +332,8 @@ radv_destroy_compute_pipeline(struct radv_device *device, struct radv_compute_pi VKAPI_ATTR VkResult VKAPI_CALL radv_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, - const VkComputePipelineCreateInfo *pCreateInfos, - const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) + const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) { - return radv_create_compute_pipelines(_device, pipelineCache, count, pCreateInfos, pAllocator, - pPipelines); + return radv_create_compute_pipelines(_device, pipelineCache, count, pCreateInfos, pAllocator, pPipelines); } diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index e7b16fc..a8790a9 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -71,8 +71,7 @@ radv_is_static_vrs_enabled(const struct radv_graphics_pipeline *pipeline, } static bool -radv_is_vrs_enabled(const struct radv_graphics_pipeline *pipeline, - const struct vk_graphics_pipeline_state *state) +radv_is_vrs_enabled(const struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state) { return radv_is_static_vrs_enabled(pipeline, state) || (pipeline->dynamic_states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE); @@ -81,8 +80,7 @@ radv_is_vrs_enabled(const struct radv_graphics_pipeline *pipeline, static bool radv_pipeline_has_ds_attachments(const struct vk_render_pass_state *rp) { - return rp->depth_attachment_format != VK_FORMAT_UNDEFINED || - rp->stencil_attachment_format != VK_FORMAT_UNDEFINED; + return rp->depth_attachment_format != VK_FORMAT_UNDEFINED || rp->stencil_attachment_format != VK_FORMAT_UNDEFINED; } static bool @@ -125,8 +123,8 @@ radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline) * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) */ void -si_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor *dst_factor, - VkBlendFactor expected_dst, VkBlendFactor replacement_src) +si_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor *dst_factor, VkBlendFactor expected_dst, + VkBlendFactor replacement_src) { if (*src_factor == expected_dst && *dst_factor == VK_BLEND_FACTOR_ZERO) { *src_factor = VK_BLEND_FACTOR_ZERO; @@ -141,8 +139,8 @@ si_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor *d } static unsigned -radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, - bool blend_enable, bool blend_need_alpha) +radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, bool blend_enable, + bool blend_need_alpha) { const struct util_format_description *desc = vk_format_description(vk_format); bool use_rbplus = device->physical_device->rad_info.rbplus_allowed; @@ -194,13 +192,11 @@ format_is_float32(VkFormat format) const struct util_format_description *desc = vk_format_description(format); int channel = vk_format_get_first_non_void_channel(format); - return channel >= 0 && desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT && - desc->channel[channel].size == 32; + return channel >= 0 && desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT && desc->channel[channel].size == 32; } static unsigned -radv_compact_spi_shader_col_format(const struct radv_shader *ps, - const struct radv_blend_state *blend) +radv_compact_spi_shader_col_format(const struct radv_shader *ps, const struct radv_blend_state *blend) { unsigned spi_shader_col_format = blend->spi_shader_col_format; unsigned value = 0, num_mrts = 0; @@ -232,18 +228,10 @@ radv_compact_spi_shader_col_format(const struct radv_shader *ps, * radv_format_meta_fs_key(radv_fs_key_format_exemplars[i]) == i. */ const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS] = { - VK_FORMAT_R32_SFLOAT, - VK_FORMAT_R32G32_SFLOAT, - VK_FORMAT_R8G8B8A8_UNORM, - VK_FORMAT_R16G16B16A16_UNORM, - VK_FORMAT_R16G16B16A16_SNORM, - VK_FORMAT_R16G16B16A16_UINT, - VK_FORMAT_R16G16B16A16_SINT, - VK_FORMAT_R32G32B32A32_SFLOAT, - VK_FORMAT_R8G8B8A8_UINT, - VK_FORMAT_R8G8B8A8_SINT, - VK_FORMAT_A2R10G10B10_UINT_PACK32, - VK_FORMAT_A2R10G10B10_SINT_PACK32, + VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, + VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R16G16B16A16_UINT, + VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R8G8B8A8_UINT, + VK_FORMAT_R8G8B8A8_SINT, VK_FORMAT_A2R10G10B10_UINT_PACK32, VK_FORMAT_A2R10G10B10_SINT_PACK32, }; unsigned @@ -295,14 +283,12 @@ static bool radv_pipeline_needs_dynamic_ps_epilog(const struct radv_graphics_pipeline *pipeline) { /* These dynamic states need to compile PS epilogs on-demand. */ - return !!(pipeline->dynamic_states & - (RADV_DYNAMIC_COLOR_BLEND_ENABLE | RADV_DYNAMIC_COLOR_WRITE_MASK | - RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE | RADV_DYNAMIC_COLOR_BLEND_EQUATION)); + return !!(pipeline->dynamic_states & (RADV_DYNAMIC_COLOR_BLEND_ENABLE | RADV_DYNAMIC_COLOR_WRITE_MASK | + RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE | RADV_DYNAMIC_COLOR_BLEND_EQUATION)); } static struct radv_blend_state -radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline, - const struct vk_graphics_pipeline_state *state) +radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state) { const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; struct radv_blend_state blend = {0}; @@ -338,16 +324,13 @@ radv_pipeline_uses_vrs_attachment(const VkGraphicsPipelineCreateInfo *pCreateInf return !!subpass->fragment_shading_rate_attachment; } - return (pCreateInfo->flags & - VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) != 0; + return (pCreateInfo->flags & VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) != 0; } static void -radv_pipeline_init_multisample_state(const struct radv_device *device, - struct radv_graphics_pipeline *pipeline, +radv_pipeline_init_multisample_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct vk_graphics_pipeline_state *state, - unsigned rast_prim) + const struct vk_graphics_pipeline_state *state, unsigned rast_prim) { struct radv_multisample_state *ms = &pipeline->ms; @@ -521,14 +504,13 @@ radv_dynamic_state_mask(VkDynamicState state) } } -#define RADV_DYNAMIC_CB_STATES \ - (RADV_DYNAMIC_LOGIC_OP_ENABLE | RADV_DYNAMIC_LOGIC_OP | RADV_DYNAMIC_COLOR_WRITE_ENABLE | \ - RADV_DYNAMIC_COLOR_WRITE_MASK | RADV_DYNAMIC_COLOR_BLEND_ENABLE | \ - RADV_DYNAMIC_COLOR_BLEND_EQUATION | RADV_DYNAMIC_BLEND_CONSTANTS) +#define RADV_DYNAMIC_CB_STATES \ + (RADV_DYNAMIC_LOGIC_OP_ENABLE | RADV_DYNAMIC_LOGIC_OP | RADV_DYNAMIC_COLOR_WRITE_ENABLE | \ + RADV_DYNAMIC_COLOR_WRITE_MASK | RADV_DYNAMIC_COLOR_BLEND_ENABLE | RADV_DYNAMIC_COLOR_BLEND_EQUATION | \ + RADV_DYNAMIC_BLEND_CONSTANTS) static bool -radv_pipeline_is_blend_enabled(const struct radv_graphics_pipeline *pipeline, - const struct vk_color_blend_state *cb) +radv_pipeline_is_blend_enabled(const struct radv_graphics_pipeline *pipeline, const struct vk_color_blend_state *cb) { if (cb) { for (uint32_t i = 0; i < cb->attachment_count; i++) { @@ -545,13 +527,12 @@ radv_pipeline_is_blend_enabled(const struct radv_graphics_pipeline *pipeline, } static uint64_t -radv_pipeline_needed_dynamic_state(const struct radv_device *device, - const struct radv_graphics_pipeline *pipeline, +radv_pipeline_needed_dynamic_state(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state) { bool has_color_att = radv_pipeline_has_color_attachments(state->rp); - bool raster_enabled = !state->rs->rasterizer_discard_enable || - (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE); + bool raster_enabled = + !state->rs->rasterizer_discard_enable || (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE); uint64_t states = RADV_DYNAMIC_ALL; if (device->physical_device->rad_info.gfx_level < GFX10_3) @@ -578,8 +559,7 @@ radv_pipeline_needed_dynamic_state(const struct radv_device *device, return states; } - if (!state->rs->depth_bias.enable && - !(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BIAS_ENABLE)) + if (!state->rs->depth_bias.enable && !(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BIAS_ENABLE)) states &= ~RADV_DYNAMIC_DEPTH_BIAS; if (!(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) && @@ -588,19 +568,17 @@ radv_pipeline_needed_dynamic_state(const struct radv_device *device, if (!(pipeline->dynamic_states & RADV_DYNAMIC_STENCIL_TEST_ENABLE) && (!state->ds || !state->ds->stencil.test_enable)) - states &= ~(RADV_DYNAMIC_STENCIL_COMPARE_MASK | RADV_DYNAMIC_STENCIL_WRITE_MASK | - RADV_DYNAMIC_STENCIL_REFERENCE | RADV_DYNAMIC_STENCIL_OP); + states &= ~(RADV_DYNAMIC_STENCIL_COMPARE_MASK | RADV_DYNAMIC_STENCIL_WRITE_MASK | RADV_DYNAMIC_STENCIL_REFERENCE | + RADV_DYNAMIC_STENCIL_OP); - if (!(pipeline->dynamic_states & RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE) && - !state->dr->rectangle_count) + if (!(pipeline->dynamic_states & RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE) && !state->dr->rectangle_count) states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE; if (!(pipeline->dynamic_states & RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE) && (!state->ms || !state->ms->sample_locations_enable)) states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS; - if (!(pipeline->dynamic_states & RADV_DYNAMIC_LINE_STIPPLE_ENABLE) && - !state->rs->line.stipple.enable) + if (!(pipeline->dynamic_states & RADV_DYNAMIC_LINE_STIPPLE_ENABLE) && !state->rs->line.stipple.enable) states &= ~RADV_DYNAMIC_LINE_STIPPLE; if (!has_color_att || !radv_pipeline_is_blend_enabled(pipeline, state->cb)) @@ -616,8 +594,7 @@ radv_pipeline_needed_dynamic_state(const struct radv_device *device, } static struct radv_ia_multi_vgt_param_helpers -radv_compute_ia_multi_vgt_param_helpers(const struct radv_device *device, - struct radv_graphics_pipeline *pipeline) +radv_compute_ia_multi_vgt_param_helpers(const struct radv_device *device, struct radv_graphics_pipeline *pipeline) { const struct radv_physical_device *pdevice = device->physical_device; struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0}; @@ -665,8 +642,7 @@ radv_compute_ia_multi_vgt_param_helpers(const struct radv_device *device, * Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242 */ if (pdevice->rad_info.family == CHIP_TONGA || pdevice->rad_info.family == CHIP_FIJI || - pdevice->rad_info.family == CHIP_POLARIS10 || - pdevice->rad_info.family == CHIP_POLARIS11 || + pdevice->rad_info.family == CHIP_POLARIS10 || pdevice->rad_info.family == CHIP_POLARIS11 || pdevice->rad_info.family == CHIP_POLARIS12 || pdevice->rad_info.family == CHIP_VEGAM) { ia_multi_vgt_param.partial_vs_wave = true; } @@ -694,10 +670,10 @@ radv_get_attrib_stride(const VkPipelineVertexInputStateCreateInfo *vi, uint32_t return 0; } -#define ALL_GRAPHICS_LIB_FLAGS \ - (VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT | \ - VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | \ - VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | \ +#define ALL_GRAPHICS_LIB_FLAGS \ + (VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT | \ + VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | \ + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | \ VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) static VkGraphicsPipelineLibraryFlagBitsEXT @@ -720,10 +696,8 @@ shader_stage_to_pipeline_library_flags(VkShaderStageFlagBits stage) } static VkResult -radv_pipeline_import_graphics_info(struct radv_device *device, - struct radv_graphics_pipeline *pipeline, - struct vk_graphics_pipeline_state *state, - struct radv_pipeline_layout *layout, +radv_pipeline_import_graphics_info(struct radv_device *device, struct radv_graphics_pipeline *pipeline, + struct vk_graphics_pipeline_state *state, struct radv_pipeline_layout *layout, const VkGraphicsPipelineCreateInfo *pCreateInfo, VkGraphicsPipelineLibraryFlagBitsEXT lib_flags) { @@ -734,8 +708,7 @@ radv_pipeline_import_graphics_info(struct radv_device *device, if (pCreateInfo->pDynamicState) { uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; for (uint32_t s = 0; s < count; s++) { - pipeline->dynamic_states |= - radv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]); + pipeline->dynamic_states |= radv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]); } } @@ -750,17 +723,15 @@ radv_pipeline_import_graphics_info(struct radv_device *device, pipeline->active_stages |= sinfo->stage; } - result = - vk_graphics_pipeline_state_fill(&device->vk, state, pCreateInfo, NULL, NULL, NULL, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, &pipeline->state_data); + result = vk_graphics_pipeline_state_fill(&device->vk, state, pCreateInfo, NULL, NULL, NULL, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, &pipeline->state_data); if (result != VK_SUCCESS) return result; if (pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_EXT) { pipeline->last_vgt_api_stage = MESA_SHADER_MESH; } else { - pipeline->last_vgt_api_stage = - util_last_bit(pipeline->active_stages & BITFIELD_MASK(MESA_SHADER_FRAGMENT)) - 1; + pipeline->last_vgt_api_stage = util_last_bit(pipeline->active_stages & BITFIELD_MASK(MESA_SHADER_FRAGMENT)) - 1; } if (lib_flags == ALL_GRAPHICS_LIB_FLAGS) { @@ -808,10 +779,8 @@ radv_pipeline_import_graphics_info(struct radv_device *device, } static void -radv_graphics_pipeline_import_lib(const struct radv_device *device, - struct radv_graphics_pipeline *pipeline, - struct vk_graphics_pipeline_state *state, - struct radv_pipeline_layout *layout, +radv_graphics_pipeline_import_lib(const struct radv_device *device, struct radv_graphics_pipeline *pipeline, + struct vk_graphics_pipeline_state *state, struct radv_pipeline_layout *layout, struct radv_graphics_lib_pipeline *lib, bool link_optimize) { bool import_binaries = false; @@ -867,8 +836,7 @@ radv_graphics_pipeline_import_lib(const struct radv_device *device, } static void -radv_pipeline_init_input_assembly_state(const struct radv_device *device, - struct radv_graphics_pipeline *pipeline) +radv_pipeline_init_input_assembly_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline) { pipeline->ia_multi_vgt_param = radv_compute_ia_multi_vgt_param_helpers(device, pipeline); } @@ -891,13 +859,11 @@ radv_pipeline_uses_ds_feedback_loop(const VkGraphicsPipelineCreateInfo *pCreateI } } - return (pCreateInfo->flags & - VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) != 0; + return (pCreateInfo->flags & VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) != 0; } static void -radv_pipeline_init_dynamic_state(const struct radv_device *device, - struct radv_graphics_pipeline *pipeline, +radv_pipeline_init_dynamic_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state, const VkGraphicsPipelineCreateInfo *pCreateInfo) { @@ -1051,15 +1017,13 @@ radv_pipeline_init_dynamic_state(const struct radv_device *device, } if (states & RADV_DYNAMIC_SAMPLE_LOCATIONS) { - unsigned count = state->ms->sample_locations->per_pixel * - state->ms->sample_locations->grid_size.width * + unsigned count = state->ms->sample_locations->per_pixel * state->ms->sample_locations->grid_size.width * state->ms->sample_locations->grid_size.height; dynamic->sample_location.per_pixel = state->ms->sample_locations->per_pixel; dynamic->sample_location.grid_size = state->ms->sample_locations->grid_size; dynamic->sample_location.count = count; - typed_memcpy(&dynamic->sample_location.locations[0], state->ms->sample_locations->locations, - count); + typed_memcpy(&dynamic->sample_location.locations[0], state->ms->sample_locations->locations, count); } /* Depth stencil. */ @@ -1191,16 +1155,14 @@ radv_pipeline_init_dynamic_state(const struct radv_device *device, bool uses_ds_feedback_loop = radv_pipeline_uses_ds_feedback_loop(pCreateInfo, state); dynamic->feedback_loop_aspects = - uses_ds_feedback_loop ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) - : VK_IMAGE_ASPECT_NONE; + uses_ds_feedback_loop ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_NONE; } pipeline->dynamic_state.mask = states; } static uint32_t -radv_compute_db_shader_control(const struct radv_device *device, - const struct radv_graphics_pipeline *pipeline, +radv_compute_db_shader_control(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state) { const struct radv_physical_device *pdevice = device->physical_device; @@ -1227,25 +1189,21 @@ radv_compute_db_shader_control(const struct radv_device *device, return S_02880C_Z_EXPORT_ENABLE(ps && ps->info.ps.writes_z) | S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps && ps->info.ps.writes_stencil) | - S_02880C_KILL_ENABLE(ps && ps->info.ps.can_discard) | - S_02880C_MASK_EXPORT_ENABLE(mask_export_enable) | + S_02880C_KILL_ENABLE(ps && ps->info.ps.can_discard) | S_02880C_MASK_EXPORT_ENABLE(mask_export_enable) | S_02880C_CONSERVATIVE_Z_EXPORT(conservative_z_export) | S_02880C_DEPTH_BEFORE_SHADER(ps && ps->info.ps.early_fragment_test) | S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(ps && ps->info.ps.post_depth_coverage) | S_02880C_EXEC_ON_HIER_FAIL(ps && ps->info.ps.writes_memory) | - S_02880C_EXEC_ON_NOOP(ps && ps->info.ps.writes_memory) | - S_02880C_DUAL_QUAD_DISABLE(disable_rbplus) | + S_02880C_EXEC_ON_NOOP(ps && ps->info.ps.writes_memory) | S_02880C_DUAL_QUAD_DISABLE(disable_rbplus) | S_02880C_OVERRIDE_INTRINSIC_RATE_ENABLE(export_conflict_wa) | S_02880C_OVERRIDE_INTRINSIC_RATE(export_conflict_wa ? 2 : 0); } static void -gfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, - uint32_t oversub_pc_lines) +gfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, uint32_t oversub_pc_lines) { - radeon_set_uconfig_reg( - cs, R_030980_GE_PC_ALLOC, - S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1)); + radeon_set_uconfig_reg(cs, R_030980_GE_PC_ALLOC, + S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1)); } struct radv_shader * @@ -1292,8 +1250,7 @@ get_vs_output_info(const struct radv_graphics_pipeline *pipeline) } static bool -radv_should_export_multiview(const struct radv_pipeline_stage *producer, - const struct radv_pipeline_stage *consumer, +radv_should_export_multiview(const struct radv_pipeline_stage *producer, const struct radv_pipeline_stage *consumer, const struct radv_pipeline_key *pipeline_key) { /* Export the layer in the last VGT stage if multiview is used. When the next stage is unknown @@ -1306,11 +1263,9 @@ radv_should_export_multiview(const struct radv_pipeline_stage *producer, } static void -radv_remove_point_size(const struct radv_pipeline_key *pipeline_key, nir_shader *producer, - nir_shader *consumer) +radv_remove_point_size(const struct radv_pipeline_key *pipeline_key, nir_shader *producer, nir_shader *consumer) { - if ((consumer->info.inputs_read & VARYING_BIT_PSIZ) || - !(producer->info.outputs_written & VARYING_BIT_PSIZ)) + if ((consumer->info.inputs_read & VARYING_BIT_PSIZ) || !(producer->info.outputs_written & VARYING_BIT_PSIZ)) return; /* Do not remove PSIZ if the shader uses XFB because it might be stored. */ @@ -1320,14 +1275,11 @@ radv_remove_point_size(const struct radv_pipeline_key *pipeline_key, nir_shader /* Do not remove PSIZ if the rasterization primitive uses points. */ if (consumer->info.stage == MESA_SHADER_FRAGMENT && ((producer->info.stage == MESA_SHADER_TESS_EVAL && producer->info.tess.point_mode) || - (producer->info.stage == MESA_SHADER_GEOMETRY && - producer->info.gs.output_primitive == MESA_PRIM_POINTS) || - (producer->info.stage == MESA_SHADER_MESH && - producer->info.mesh.primitive_type == MESA_PRIM_POINTS))) + (producer->info.stage == MESA_SHADER_GEOMETRY && producer->info.gs.output_primitive == MESA_PRIM_POINTS) || + (producer->info.stage == MESA_SHADER_MESH && producer->info.mesh.primitive_type == MESA_PRIM_POINTS))) return; - nir_variable *var = - nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ); + nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ); assert(var); /* Change PSIZ to a global variable which allows it to be DCE'd. */ @@ -1353,8 +1305,7 @@ radv_remove_color_exports(const struct radv_pipeline_key *pipeline_key, nir_shad if (pipeline_key->dynamic_color_write_mask) return; - nir_foreach_shader_out_variable(var, nir) - { + nir_foreach_shader_out_variable (var, nir) { int idx = var->data.location; idx -= FRAG_RESULT_DATA0; @@ -1403,8 +1354,7 @@ merge_tess_info(struct shader_info *tes_info, struct shader_info *tcs_info) tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out); tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out; - assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED || - tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED || + assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED || tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED || tcs_info->tess.spacing == tes_info->tess.spacing); tes_info->tess.spacing |= tcs_info->tess.spacing; @@ -1424,8 +1374,8 @@ merge_tess_info(struct shader_info *tes_info, struct shader_info *tcs_info) } static void -radv_pipeline_link_shaders(const struct radv_device *device, nir_shader *producer, - nir_shader *consumer, const struct radv_pipeline_key *pipeline_key) +radv_pipeline_link_shaders(const struct radv_device *device, nir_shader *producer, nir_shader *consumer, + const struct radv_pipeline_key *pipeline_key) { const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; bool progress; @@ -1444,8 +1394,7 @@ radv_pipeline_link_shaders(const struct radv_device *device, nir_shader *produce if (pipeline_key->optimisations_disabled) return; - if (consumer->info.stage == MESA_SHADER_FRAGMENT && - producer->info.has_transform_feedback_varyings) { + if (consumer->info.stage == MESA_SHADER_FRAGMENT && producer->info.has_transform_feedback_varyings) { nir_link_xfb_varyings(producer, consumer); } @@ -1508,8 +1457,7 @@ radv_pipeline_link_shaders(const struct radv_device *device, nir_shader *produce NIR_PASS(_, producer, nir_opt_combine_stores, nir_var_shader_out); } - if (consumer->info.stage == MESA_SHADER_GEOMETRY || - consumer->info.stage == MESA_SHADER_TESS_CTRL || + if (consumer->info.stage == MESA_SHADER_GEOMETRY || consumer->info.stage == MESA_SHADER_TESS_CTRL || consumer->info.stage == MESA_SHADER_TESS_EVAL) { NIR_PASS(_, consumer, nir_lower_io_to_vector, nir_var_shader_in); } @@ -1542,8 +1490,7 @@ static const gl_shader_stage graphics_shader_order[] = { static void radv_pipeline_link_vs(const struct radv_device *device, struct radv_pipeline_stage *vs_stage, - struct radv_pipeline_stage *next_stage, - const struct radv_pipeline_key *pipeline_key) + struct radv_pipeline_stage *next_stage, const struct radv_pipeline_key *pipeline_key) { assert(vs_stage->nir->info.stage == MESA_SHADER_VERTEX); @@ -1564,20 +1511,17 @@ radv_pipeline_link_vs(const struct radv_device *device, struct radv_pipeline_sta } if (next_stage && next_stage->nir->info.stage == MESA_SHADER_TESS_CTRL) { - nir_linked_io_var_info vs2tcs = - nir_assign_linked_io_var_locations(vs_stage->nir, next_stage->nir); + nir_linked_io_var_info vs2tcs = nir_assign_linked_io_var_locations(vs_stage->nir, next_stage->nir); vs_stage->info.vs.num_linked_outputs = vs2tcs.num_linked_io_vars; next_stage->info.tcs.num_linked_inputs = vs2tcs.num_linked_io_vars; } else if (next_stage && next_stage->nir->info.stage == MESA_SHADER_GEOMETRY) { - nir_linked_io_var_info vs2gs = - nir_assign_linked_io_var_locations(vs_stage->nir, next_stage->nir); + nir_linked_io_var_info vs2gs = nir_assign_linked_io_var_locations(vs_stage->nir, next_stage->nir); vs_stage->info.vs.num_linked_outputs = vs2gs.num_linked_io_vars; next_stage->info.gs.num_linked_inputs = vs2gs.num_linked_io_vars; } else { - nir_foreach_shader_out_variable(var, vs_stage->nir) - { + nir_foreach_shader_out_variable (var, vs_stage->nir) { var->data.driver_location = var->data.location; } } @@ -1585,8 +1529,7 @@ radv_pipeline_link_vs(const struct radv_device *device, struct radv_pipeline_sta static void radv_pipeline_link_tcs(const struct radv_device *device, struct radv_pipeline_stage *tcs_stage, - struct radv_pipeline_stage *tes_stage, - const struct radv_pipeline_key *pipeline_key) + struct radv_pipeline_stage *tes_stage, const struct radv_pipeline_key *pipeline_key) { assert(tcs_stage->nir->info.stage == MESA_SHADER_TESS_CTRL); assert(tes_stage->nir->info.stage == MESA_SHADER_TESS_EVAL); @@ -1598,8 +1541,7 @@ radv_pipeline_link_tcs(const struct radv_device *device, struct radv_pipeline_st /* Copy TCS info into the TES info */ merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info); - nir_linked_io_var_info tcs2tes = - nir_assign_linked_io_var_locations(tcs_stage->nir, tes_stage->nir); + nir_linked_io_var_info tcs2tes = nir_assign_linked_io_var_locations(tcs_stage->nir, tes_stage->nir); tcs_stage->info.tcs.num_linked_outputs = tcs2tes.num_linked_io_vars; tcs_stage->info.tcs.num_linked_patch_outputs = tcs2tes.num_linked_patch_io_vars; @@ -1609,8 +1551,7 @@ radv_pipeline_link_tcs(const struct radv_device *device, struct radv_pipeline_st static void radv_pipeline_link_tes(const struct radv_device *device, struct radv_pipeline_stage *tes_stage, - struct radv_pipeline_stage *next_stage, - const struct radv_pipeline_key *pipeline_key) + struct radv_pipeline_stage *next_stage, const struct radv_pipeline_key *pipeline_key) { assert(tes_stage->nir->info.stage == MESA_SHADER_TESS_EVAL); @@ -1626,14 +1567,12 @@ radv_pipeline_link_tes(const struct radv_device *device, struct radv_pipeline_st } if (next_stage && next_stage->nir->info.stage == MESA_SHADER_GEOMETRY) { - nir_linked_io_var_info tes2gs = - nir_assign_linked_io_var_locations(tes_stage->nir, next_stage->nir); + nir_linked_io_var_info tes2gs = nir_assign_linked_io_var_locations(tes_stage->nir, next_stage->nir); tes_stage->info.tes.num_linked_outputs = tes2gs.num_linked_io_vars; next_stage->info.gs.num_linked_inputs = tes2gs.num_linked_io_vars; } else { - nir_foreach_shader_out_variable(var, tes_stage->nir) - { + nir_foreach_shader_out_variable (var, tes_stage->nir) { var->data.driver_location = var->data.location; } } @@ -1641,8 +1580,7 @@ radv_pipeline_link_tes(const struct radv_device *device, struct radv_pipeline_st static void radv_pipeline_link_gs(const struct radv_device *device, struct radv_pipeline_stage *gs_stage, - struct radv_pipeline_stage *fs_stage, - const struct radv_pipeline_key *pipeline_key) + struct radv_pipeline_stage *fs_stage, const struct radv_pipeline_key *pipeline_key) { assert(gs_stage->nir->info.stage == MESA_SHADER_GEOMETRY); @@ -1656,16 +1594,14 @@ radv_pipeline_link_gs(const struct radv_device *device, struct radv_pipeline_sta radv_pipeline_link_shaders(device, gs_stage->nir, fs_stage->nir, pipeline_key); } - nir_foreach_shader_out_variable(var, gs_stage->nir) - { + nir_foreach_shader_out_variable (var, gs_stage->nir) { var->data.driver_location = var->data.location; } } static void radv_pipeline_link_task(const struct radv_device *device, struct radv_pipeline_stage *task_stage, - struct radv_pipeline_stage *mesh_stage, - const struct radv_pipeline_key *pipeline_key) + struct radv_pipeline_stage *mesh_stage, const struct radv_pipeline_key *pipeline_key) { assert(task_stage->nir->info.stage == MESA_SHADER_TASK); assert(mesh_stage->nir->info.stage == MESA_SHADER_MESH); @@ -1676,8 +1612,7 @@ radv_pipeline_link_task(const struct radv_device *device, struct radv_pipeline_s static void radv_pipeline_link_mesh(const struct radv_device *device, struct radv_pipeline_stage *mesh_stage, - struct radv_pipeline_stage *fs_stage, - const struct radv_pipeline_key *pipeline_key) + struct radv_pipeline_stage *fs_stage, const struct radv_pipeline_key *pipeline_key) { assert(mesh_stage->nir->info.stage == MESA_SHADER_MESH); @@ -1686,8 +1621,7 @@ radv_pipeline_link_mesh(const struct radv_device *device, struct radv_pipeline_s nir_foreach_shader_in_variable (var, fs_stage->nir) { /* These variables are per-primitive when used with a mesh shader. */ - if (var->data.location == VARYING_SLOT_PRIMITIVE_ID || - var->data.location == VARYING_SLOT_VIEWPORT || + if (var->data.location == VARYING_SLOT_PRIMITIVE_ID || var->data.location == VARYING_SLOT_VIEWPORT || var->data.location == VARYING_SLOT_LAYER) { var->data.per_primitive = true; } @@ -1699,29 +1633,25 @@ radv_pipeline_link_mesh(const struct radv_device *device, struct radv_pipeline_s /* ac_nir_lower_ngg ignores driver locations for mesh shaders, but set them to all zero just to * be on the safe side. */ - nir_foreach_shader_out_variable(var, mesh_stage->nir) - { + nir_foreach_shader_out_variable (var, mesh_stage->nir) { var->data.driver_location = 0; } } static void -radv_pipeline_link_fs(struct radv_pipeline_stage *fs_stage, - const struct radv_pipeline_key *pipeline_key) +radv_pipeline_link_fs(struct radv_pipeline_stage *fs_stage, const struct radv_pipeline_key *pipeline_key) { assert(fs_stage->nir->info.stage == MESA_SHADER_FRAGMENT); radv_remove_color_exports(pipeline_key, fs_stage->nir); - nir_foreach_shader_out_variable(var, fs_stage->nir) - { + nir_foreach_shader_out_variable (var, fs_stage->nir) { var->data.driver_location = var->data.location + var->data.index; } } static bool -radv_pipeline_needs_noop_fs(struct radv_graphics_pipeline *pipeline, - const struct radv_pipeline_key *pipeline_key) +radv_pipeline_needs_noop_fs(struct radv_graphics_pipeline *pipeline, const struct radv_pipeline_key *pipeline_key) { if (pipeline->base.type == RADV_PIPELINE_GRAPHICS && !(radv_pipeline_to_graphics(&pipeline->base)->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT)) @@ -1729,8 +1659,7 @@ radv_pipeline_needs_noop_fs(struct radv_graphics_pipeline *pipeline, if (pipeline->base.type == RADV_PIPELINE_GRAPHICS_LIB && (pipeline_key->lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) && - !(radv_pipeline_to_graphics_lib(&pipeline->base)->base.active_stages & - VK_SHADER_STAGE_FRAGMENT_BIT)) + !(radv_pipeline_to_graphics_lib(&pipeline->base)->base.active_stages & VK_SHADER_STAGE_FRAGMENT_BIT)) return true; return false; @@ -1741,7 +1670,7 @@ radv_remove_varyings(nir_shader *nir) { bool fixup_derefs = false; - nir_foreach_shader_out_variable(var, nir) { + nir_foreach_shader_out_variable (var, nir) { if (var->data.always_active_io) continue; @@ -1762,10 +1691,8 @@ radv_remove_varyings(nir_shader *nir) } static void -radv_graphics_pipeline_link(const struct radv_device *device, - struct radv_graphics_pipeline *pipeline, - const struct radv_pipeline_key *pipeline_key, - struct radv_pipeline_stage *stages) +radv_graphics_pipeline_link(const struct radv_device *device, struct radv_graphics_pipeline *pipeline, + const struct radv_pipeline_key *pipeline_key, struct radv_pipeline_stage *stages) { const bool noop_fs = radv_pipeline_needs_noop_fs(pipeline, pipeline_key); @@ -1813,8 +1740,8 @@ radv_graphics_pipeline_link(const struct radv_device *device, } struct radv_ps_epilog_key -radv_generate_ps_epilog_key(const struct radv_device *device, - const struct radv_ps_epilog_state *state, bool disable_mrt_compaction) +radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ps_epilog_state *state, + bool disable_mrt_compaction) { unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0; struct radv_ps_epilog_key key; @@ -1830,8 +1757,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, } else { bool blend_enable = state->color_blend_enable & (0xfu << (i * 4)); - cf = radv_choose_spi_color_format(device, fmt, blend_enable, - state->need_src_alpha & (1 << i)); + cf = radv_choose_spi_color_format(device, fmt, blend_enable, state->need_src_alpha & (1 << i)); if (format_is_int8(fmt)) is_int8 |= 1 << i; @@ -1880,8 +1806,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, } static struct radv_ps_epilog_key -radv_pipeline_generate_ps_epilog_key(const struct radv_device *device, - const struct vk_graphics_pipeline_state *state, +radv_pipeline_generate_ps_epilog_key(const struct radv_device *device, const struct vk_graphics_pipeline_state *state, bool disable_mrt_compaction) { struct radv_ps_epilog_state ps_epilog = {0}; @@ -1921,10 +1846,8 @@ radv_pipeline_generate_ps_epilog_key(const struct radv_device *device, } if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA || - srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || - dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || - srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA || - dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA) + srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || + srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA) ps_epilog.need_src_alpha |= 1 << i; } } @@ -1941,15 +1864,13 @@ radv_pipeline_generate_ps_epilog_key(const struct radv_device *device, } static struct radv_pipeline_key -radv_generate_graphics_pipeline_key(const struct radv_device *device, - const struct radv_graphics_pipeline *pipeline, +radv_generate_graphics_pipeline_key(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct vk_graphics_pipeline_state *state, VkGraphicsPipelineLibraryFlagBitsEXT lib_flags) { const struct radv_physical_device *pdevice = device->physical_device; - struct radv_pipeline_key key = - radv_generate_pipeline_key(device, &pipeline->base, pCreateInfo->flags); + struct radv_pipeline_key key = radv_generate_pipeline_key(device, &pipeline->base, pCreateInfo->flags); key.lib_flags = lib_flags; key.has_multiview_view_index = state->rp ? !!state->rp->view_mask : 0; @@ -1997,15 +1918,13 @@ radv_generate_graphics_pipeline_key(const struct radv_device *device, const struct ac_vtx_format_info *vtx_info = ac_get_vtx_format_info(pdevice->rad_info.gfx_level, pdevice->rad_info.family, format); - unsigned attrib_align = - vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size; + unsigned attrib_align = vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size; /* If offset is misaligned, then the buffer offset must be too. Just skip updating * vertex_binding_align in this case. */ if (offset % attrib_align == 0) { - key.vs.vertex_binding_align[binding] = - MAX2(key.vs.vertex_binding_align[binding], attrib_align); + key.vs.vertex_binding_align[binding] = MAX2(key.vs.vertex_binding_align[binding], attrib_align); } } } @@ -2015,8 +1934,7 @@ radv_generate_graphics_pipeline_key(const struct radv_device *device, if (state->ms) { key.ps.sample_shading_enable = state->ms->sample_shading_enable; - if (!(pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) && - state->ms->rasterization_samples > 1) { + if (!(pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) && state->ms->rasterization_samples > 1) { key.ps.num_samples = state->ms->rasterization_samples; } } @@ -2036,8 +1954,7 @@ radv_generate_graphics_pipeline_key(const struct radv_device *device, } if (device->physical_device->rad_info.gfx_level >= GFX10 && state->rs) { - key.vs.provoking_vtx_last = - state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; + key.vs.provoking_vtx_last = state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; } if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE) @@ -2069,29 +1986,24 @@ radv_generate_graphics_pipeline_key(const struct radv_device *device, * - it's compiled without the fragment output interface with GPL * - it's compiled on-demand because some dynamic states are enabled */ - key.ps.has_epilog = - (pipeline->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && - (((lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) && - !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) || - key.ps.dynamic_ps_epilog); + key.ps.has_epilog = (pipeline->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && + (((lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) && + !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) || + key.ps.dynamic_ps_epilog); /* Disable MRT compaction when it's not possible to know both the written color outputs and the * color blend attachments. */ bool disable_mrt_compaction = - key.ps.has_epilog || - ((lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) && - !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)); + key.ps.has_epilog || ((lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) && + !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)); - key.ps.epilog = - radv_pipeline_generate_ps_epilog_key(device, state, disable_mrt_compaction); + key.ps.epilog = radv_pipeline_generate_ps_epilog_key(device, state, disable_mrt_compaction); - key.dynamic_patch_control_points = - !!(pipeline->dynamic_states & RADV_DYNAMIC_PATCH_CONTROL_POINTS); + key.dynamic_patch_control_points = !!(pipeline->dynamic_states & RADV_DYNAMIC_PATCH_CONTROL_POINTS); - key.dynamic_rasterization_samples = - !!(pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) || - (!!(pipeline->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && !state->ms); + key.dynamic_rasterization_samples = !!(pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) || + (!!(pipeline->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && !state->ms); key.dynamic_color_write_mask = !!(pipeline->dynamic_states & RADV_DYNAMIC_COLOR_WRITE_MASK); @@ -2138,8 +2050,7 @@ radv_generate_graphics_pipeline_key(const struct radv_device *device, static void radv_fill_shader_info_ngg(struct radv_device *device, struct radv_graphics_pipeline *pipeline, - const struct radv_pipeline_key *pipeline_key, - struct radv_pipeline_stage *stages) + const struct radv_pipeline_key *pipeline_key, struct radv_pipeline_stage *stages) { if (pipeline_key->use_ngg) { if (stages[MESA_SHADER_TESS_CTRL].nir) { @@ -2150,8 +2061,8 @@ radv_fill_shader_info_ngg(struct radv_device *device, struct radv_graphics_pipel stages[MESA_SHADER_MESH].info.is_ngg = true; } - if (device->physical_device->rad_info.gfx_level < GFX11 && - stages[MESA_SHADER_TESS_CTRL].nir && stages[MESA_SHADER_GEOMETRY].nir && + if (device->physical_device->rad_info.gfx_level < GFX11 && stages[MESA_SHADER_TESS_CTRL].nir && + stages[MESA_SHADER_GEOMETRY].nir && stages[MESA_SHADER_GEOMETRY].nir->info.gs.invocations * stages[MESA_SHADER_GEOMETRY].nir->info.gs.vertices_out > 256) { @@ -2163,8 +2074,7 @@ radv_fill_shader_info_ngg(struct radv_device *device, struct radv_graphics_pipel stages[MESA_SHADER_TESS_EVAL].info.is_ngg = false; } - bool uses_xfb = pipeline->last_vgt_api_stage != MESA_SHADER_NONE && - stages[pipeline->last_vgt_api_stage].nir && + bool uses_xfb = pipeline->last_vgt_api_stage != MESA_SHADER_NONE && stages[pipeline->last_vgt_api_stage].nir && stages[pipeline->last_vgt_api_stage].nir->xfb_info; if (!device->physical_device->use_ngg_streamout && uses_xfb) { @@ -2187,15 +2097,13 @@ radv_fill_shader_info_ngg(struct radv_device *device, struct radv_graphics_pipel } static bool -radv_consider_force_vrs(const struct radv_device *device, - const struct radv_graphics_pipeline *pipeline, +radv_consider_force_vrs(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, const struct radv_pipeline_stage *stages) { if (!device->force_vrs_enabled) return false; - if (pipeline->last_vgt_api_stage != MESA_SHADER_VERTEX && - pipeline->last_vgt_api_stage != MESA_SHADER_TESS_EVAL && + if (pipeline->last_vgt_api_stage != MESA_SHADER_VERTEX && pipeline->last_vgt_api_stage != MESA_SHADER_TESS_EVAL && pipeline->last_vgt_api_stage != MESA_SHADER_GEOMETRY) return false; @@ -2250,19 +2158,17 @@ radv_get_next_stage(gl_shader_stage stage, VkShaderStageFlagBits active_nir_stag static void radv_fill_shader_info(struct radv_device *device, struct radv_graphics_pipeline *pipeline, - struct radv_pipeline_layout *pipeline_layout, - const struct radv_pipeline_key *pipeline_key, - struct radv_pipeline_stage *stages, - VkShaderStageFlagBits active_nir_stages) + struct radv_pipeline_layout *pipeline_layout, const struct radv_pipeline_key *pipeline_key, + struct radv_pipeline_stage *stages, VkShaderStageFlagBits active_nir_stages) { bool consider_force_vrs = radv_consider_force_vrs(device, pipeline, stages); - radv_foreach_stage (i, active_nir_stages) { + radv_foreach_stage(i, active_nir_stages) + { gl_shader_stage next_stage = radv_get_next_stage(i, active_nir_stages); - radv_nir_shader_info_pass( - device, stages[i].nir, next_stage, pipeline_layout, pipeline_key, pipeline->base.type, - i == pipeline->last_vgt_api_stage && consider_force_vrs, &stages[i].info); + radv_nir_shader_info_pass(device, stages[i].nir, next_stage, pipeline_layout, pipeline_key, pipeline->base.type, + i == pipeline->last_vgt_api_stage && consider_force_vrs, &stages[i].info); } radv_nir_shader_info_link(device, pipeline_key, stages); @@ -2270,22 +2176,18 @@ radv_fill_shader_info(struct radv_device *device, struct radv_graphics_pipeline static void radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stage *stages, - const struct radv_pipeline_key *pipeline_key, - VkShaderStageFlagBits active_nir_stages) + const struct radv_pipeline_key *pipeline_key, VkShaderStageFlagBits active_nir_stages) { enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) { - radv_declare_shader_args(device, pipeline_key, &stages[MESA_SHADER_TESS_CTRL].info, - MESA_SHADER_TESS_CTRL, MESA_SHADER_VERTEX, RADV_SHADER_TYPE_DEFAULT, - &stages[MESA_SHADER_TESS_CTRL].args); - stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs = - stages[MESA_SHADER_TESS_CTRL].args.user_sgprs_locs; + radv_declare_shader_args(device, pipeline_key, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL, + MESA_SHADER_VERTEX, RADV_SHADER_TYPE_DEFAULT, &stages[MESA_SHADER_TESS_CTRL].args); + stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].args.user_sgprs_locs; stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask = stages[MESA_SHADER_TESS_CTRL].args.ac.inline_push_const_mask; - stages[MESA_SHADER_VERTEX].info.user_sgprs_locs = - stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs; + stages[MESA_SHADER_VERTEX].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs; stages[MESA_SHADER_VERTEX].info.inline_push_constant_mask = stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask; stages[MESA_SHADER_VERTEX].args = stages[MESA_SHADER_TESS_CTRL].args; @@ -2295,27 +2197,23 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stag } if (gfx_level >= GFX9 && stages[MESA_SHADER_GEOMETRY].nir) { - gl_shader_stage pre_stage = - stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; - radv_declare_shader_args(device, pipeline_key, &stages[MESA_SHADER_GEOMETRY].info, - MESA_SHADER_GEOMETRY, pre_stage, RADV_SHADER_TYPE_DEFAULT, - &stages[MESA_SHADER_GEOMETRY].args); - stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs = - stages[MESA_SHADER_GEOMETRY].args.user_sgprs_locs; + gl_shader_stage pre_stage = stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; + radv_declare_shader_args(device, pipeline_key, &stages[MESA_SHADER_GEOMETRY].info, MESA_SHADER_GEOMETRY, + pre_stage, RADV_SHADER_TYPE_DEFAULT, &stages[MESA_SHADER_GEOMETRY].args); + stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].args.user_sgprs_locs; stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask = stages[MESA_SHADER_GEOMETRY].args.ac.inline_push_const_mask; stages[pre_stage].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs; - stages[pre_stage].info.inline_push_constant_mask = - stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask; + stages[pre_stage].info.inline_push_constant_mask = stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask; stages[pre_stage].args = stages[MESA_SHADER_GEOMETRY].args; active_nir_stages &= ~(1 << pre_stage); active_nir_stages &= ~(1 << MESA_SHADER_GEOMETRY); } u_foreach_bit (i, active_nir_stages) { - radv_declare_shader_args(device, pipeline_key, &stages[i].info, i, MESA_SHADER_NONE, - RADV_SHADER_TYPE_DEFAULT, &stages[i].args); + radv_declare_shader_args(device, pipeline_key, &stages[i].info, i, MESA_SHADER_NONE, RADV_SHADER_TYPE_DEFAULT, + &stages[i].args); stages[i].info.user_sgprs_locs = stages[i].args.user_sgprs_locs; stages[i].info.inline_push_constant_mask = stages[i].args.ac.inline_push_const_mask; } @@ -2323,12 +2221,10 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stag static struct radv_shader * radv_pipeline_create_gs_copy_shader(struct radv_device *device, struct radv_pipeline *pipeline, - struct vk_pipeline_cache *cache, - struct radv_pipeline_stage *stages, + struct vk_pipeline_cache *cache, struct radv_pipeline_stage *stages, const struct radv_pipeline_key *pipeline_key, - const struct radv_pipeline_layout *pipeline_layout, - bool keep_executable_info, bool keep_statistic_info, - struct radv_shader_binary **gs_copy_binary) + const struct radv_pipeline_layout *pipeline_layout, bool keep_executable_info, + bool keep_statistic_info, struct radv_shader_binary **gs_copy_binary) { const struct radv_shader_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info; ac_nir_gs_output_info output_info = { @@ -2337,9 +2233,8 @@ radv_pipeline_create_gs_copy_shader(struct radv_device *device, struct radv_pipe }; nir_shader *nir = ac_nir_create_gs_copy_shader( stages[MESA_SHADER_GEOMETRY].nir, device->physical_device->rad_info.gfx_level, - gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask, - gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, - gs_info->force_vrs_per_vertex, &output_info); + gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask, gs_info->outinfo.vs_output_param_offset, + gs_info->outinfo.param_exports, false, false, gs_info->force_vrs_per_vertex, &output_info); nir_validate_shader(nir, "after ac_nir_create_gs_copy_shader"); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); @@ -2349,22 +2244,21 @@ radv_pipeline_create_gs_copy_shader(struct radv_device *device, struct radv_pipe .shader_sha1 = {0}, }; radv_nir_shader_info_init(&gs_copy_stage.info); - radv_nir_shader_info_pass(device, nir, MESA_SHADER_FRAGMENT, pipeline_layout, pipeline_key, - pipeline->type, false, &gs_copy_stage.info); + radv_nir_shader_info_pass(device, nir, MESA_SHADER_FRAGMENT, pipeline_layout, pipeline_key, pipeline->type, false, + &gs_copy_stage.info); gs_copy_stage.info.wave_size = 64; /* Wave32 not supported. */ gs_copy_stage.info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */ gs_copy_stage.info.so = gs_info->so; gs_copy_stage.info.outinfo = gs_info->outinfo; gs_copy_stage.info.force_vrs_per_vertex = gs_info->force_vrs_per_vertex; - radv_declare_shader_args(device, pipeline_key, &gs_copy_stage.info, MESA_SHADER_VERTEX, - MESA_SHADER_NONE, RADV_SHADER_TYPE_GS_COPY, &gs_copy_stage.args); + radv_declare_shader_args(device, pipeline_key, &gs_copy_stage.info, MESA_SHADER_VERTEX, MESA_SHADER_NONE, + RADV_SHADER_TYPE_GS_COPY, &gs_copy_stage.args); gs_copy_stage.info.user_sgprs_locs = gs_copy_stage.args.user_sgprs_locs; gs_copy_stage.info.inline_push_constant_mask = gs_copy_stage.args.ac.inline_push_const_mask; - NIR_PASS_V(nir, radv_nir_lower_abi, device->physical_device->rad_info.gfx_level, - &gs_copy_stage.info, &gs_copy_stage.args, pipeline_key, - device->physical_device->rad_info.address32_hi); + NIR_PASS_V(nir, radv_nir_lower_abi, device->physical_device->rad_info.gfx_level, &gs_copy_stage.info, + &gs_copy_stage.args, pipeline_key, device->physical_device->rad_info.address32_hi); struct radv_pipeline_key key = { .optimisations_disabled = pipeline_key->optimisations_disabled, @@ -2378,11 +2272,9 @@ static void radv_pipeline_nir_to_asm(struct radv_device *device, struct radv_graphics_pipeline *pipeline, struct vk_pipeline_cache *cache, struct radv_pipeline_stage *stages, const struct radv_pipeline_key *pipeline_key, - const struct radv_pipeline_layout *pipeline_layout, - bool keep_executable_info, bool keep_statistic_info, - VkShaderStageFlagBits active_nir_stages, - struct radv_shader_binary **binaries, - struct radv_shader_binary **gs_copy_binary) + const struct radv_pipeline_layout *pipeline_layout, bool keep_executable_info, + bool keep_statistic_info, VkShaderStageFlagBits active_nir_stages, + struct radv_shader_binary **binaries, struct radv_shader_binary **gs_copy_binary) { for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) { if (!(active_nir_stages & (1 << s)) || pipeline->base.shaders[s]) @@ -2409,14 +2301,13 @@ radv_pipeline_nir_to_asm(struct radv_device *device, struct radv_graphics_pipeli int64_t stage_start = os_time_get_nano(); - pipeline->base.shaders[s] = - radv_shader_nir_to_asm(device, cache, &stages[s], shaders, shader_count, pipeline_key, - keep_executable_info, keep_statistic_info, &binaries[s]); + pipeline->base.shaders[s] = radv_shader_nir_to_asm(device, cache, &stages[s], shaders, shader_count, pipeline_key, + keep_executable_info, keep_statistic_info, &binaries[s]); if (s == MESA_SHADER_GEOMETRY && !stages[s].info.is_ngg) { - pipeline->base.gs_copy_shader = radv_pipeline_create_gs_copy_shader( - device, &pipeline->base, cache, stages, pipeline_key, pipeline_layout, - keep_executable_info, keep_statistic_info, gs_copy_binary); + pipeline->base.gs_copy_shader = + radv_pipeline_create_gs_copy_shader(device, &pipeline->base, cache, stages, pipeline_key, pipeline_layout, + keep_executable_info, keep_statistic_info, gs_copy_binary); } stages[s].feedback.duration += os_time_get_nano() - stage_start; @@ -2429,8 +2320,7 @@ radv_pipeline_nir_to_asm(struct radv_device *device, struct radv_graphics_pipeli static void radv_pipeline_get_nir(struct radv_device *device, struct radv_graphics_pipeline *pipeline, - struct radv_pipeline_stage *stages, - const struct radv_pipeline_key *pipeline_key) + struct radv_pipeline_stage *stages, const struct radv_pipeline_key *pipeline_key) { for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) { if (!stages[s].entrypoint) @@ -2440,8 +2330,7 @@ radv_pipeline_get_nir(struct radv_device *device, struct radv_graphics_pipeline /* NIR might already have been imported from a library. */ if (!stages[s].nir) { - stages[s].nir = - radv_shader_spirv_to_nir(device, &stages[s], pipeline_key, pipeline->base.is_internal); + stages[s].nir = radv_shader_spirv_to_nir(device, &stages[s], pipeline_key, pipeline->base.is_internal); } stages[s].feedback.duration += os_time_get_nano() - stage_start; @@ -2449,8 +2338,7 @@ radv_pipeline_get_nir(struct radv_device *device, struct radv_graphics_pipeline } static void -radv_pipeline_retain_shaders(struct radv_graphics_lib_pipeline *gfx_pipeline_lib, - struct radv_pipeline_stage *stages) +radv_pipeline_retain_shaders(struct radv_graphics_lib_pipeline *gfx_pipeline_lib, struct radv_pipeline_stage *stages) { for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) { if (!stages[s].entrypoint) @@ -2466,18 +2354,15 @@ radv_pipeline_retain_shaders(struct radv_graphics_lib_pipeline *gfx_pipeline_lib blob_finish_get_buffer(&blob, &gfx_pipeline_lib->retained_shaders[s].serialized_nir, &gfx_pipeline_lib->retained_shaders[s].serialized_nir_size); - memcpy(gfx_pipeline_lib->retained_shaders[s].shader_sha1, stages[s].shader_sha1, - sizeof(stages[s].shader_sha1)); + memcpy(gfx_pipeline_lib->retained_shaders[s].shader_sha1, stages[s].shader_sha1, sizeof(stages[s].shader_sha1)); stages[s].feedback.duration += os_time_get_nano() - stage_start; } } static void -radv_pipeline_import_retained_shaders(const struct radv_device *device, - struct radv_graphics_pipeline *pipeline, - struct radv_graphics_lib_pipeline *lib, - struct radv_pipeline_stage *stages) +radv_pipeline_import_retained_shaders(const struct radv_device *device, struct radv_graphics_pipeline *pipeline, + struct radv_graphics_lib_pipeline *lib, struct radv_pipeline_stage *stages) { /* Import the stages (SPIR-V only in case of cache hits). */ for (uint32_t i = 0; i < lib->stage_count; i++) { @@ -2507,8 +2392,7 @@ radv_pipeline_import_retained_shaders(const struct radv_device *device, stages[s].stage = s; stages[s].nir = nir_deserialize(NULL, options, &blob_reader); stages[s].entrypoint = nir_shader_get_entrypoint(stages[s].nir)->function->name; - memcpy(stages[s].shader_sha1, lib->retained_shaders[s].shader_sha1, - sizeof(stages[s].shader_sha1)); + memcpy(stages[s].shader_sha1, lib->retained_shaders[s].shader_sha1, sizeof(stages[s].shader_sha1)); stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; @@ -2517,15 +2401,12 @@ radv_pipeline_import_retained_shaders(const struct radv_device *device, } static void -radv_pipeline_load_retained_shaders(const struct radv_device *device, - struct radv_graphics_pipeline *pipeline, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - struct radv_pipeline_stage *stages) +radv_pipeline_load_retained_shaders(const struct radv_device *device, struct radv_graphics_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo, struct radv_pipeline_stage *stages) { const VkPipelineLibraryCreateInfoKHR *libs_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR); - const bool link_optimize = - (pCreateInfo->flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) != 0; + const bool link_optimize = (pCreateInfo->flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) != 0; /* Nothing to load if no libs are imported. */ if (!libs_info) @@ -2537,8 +2418,7 @@ radv_pipeline_load_retained_shaders(const struct radv_device *device, for (uint32_t i = 0; i < libs_info->libraryCount; i++) { RADV_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]); - struct radv_graphics_lib_pipeline *gfx_pipeline_lib = - radv_pipeline_to_graphics_lib(pipeline_lib); + struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib); radv_pipeline_import_retained_shaders(device, pipeline, gfx_pipeline_lib, stages); } @@ -2558,18 +2438,15 @@ radv_pipeline_create_ps_epilog(struct radv_device *device, struct radv_graphics_ if (pipeline->base.type == RADV_PIPELINE_GRAPHICS) { needs_ps_epilog = pipeline->base.shaders[MESA_SHADER_FRAGMENT] && - pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.ps.has_epilog && - !pipeline->ps_epilog; + pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.ps.has_epilog && !pipeline->ps_epilog; } else { assert(pipeline->base.type == RADV_PIPELINE_GRAPHICS_LIB); - needs_ps_epilog = - (lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) && - !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT); + needs_ps_epilog = (lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) && + !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT); } if (needs_ps_epilog) { - pipeline->ps_epilog = - radv_create_ps_epilog(device, &pipeline_key->ps.epilog, ps_epilog_binary); + pipeline->ps_epilog = radv_create_ps_epilog(device, &pipeline_key->ps.epilog, ps_epilog_binary); if (!pipeline->ps_epilog) return false; } @@ -2578,8 +2455,7 @@ radv_pipeline_create_ps_epilog(struct radv_device *device, struct radv_graphics_ } static unsigned -radv_get_rasterization_prim(const struct radv_pipeline_stage *stages, - const struct radv_pipeline_key *pipeline_key) +radv_get_rasterization_prim(const struct radv_pipeline_stage *stages, const struct radv_pipeline_key *pipeline_key) { unsigned rast_prim; @@ -2587,14 +2463,12 @@ radv_get_rasterization_prim(const struct radv_pipeline_stage *stages, return -1; if (stages[MESA_SHADER_GEOMETRY].nir) { - rast_prim = - si_conv_gl_prim_to_gs_out(stages[MESA_SHADER_GEOMETRY].nir->info.gs.output_primitive); + rast_prim = si_conv_gl_prim_to_gs_out(stages[MESA_SHADER_GEOMETRY].nir->info.gs.output_primitive); } else if (stages[MESA_SHADER_TESS_EVAL].nir) { if (stages[MESA_SHADER_TESS_EVAL].nir->info.tess.point_mode) { rast_prim = V_028A6C_POINTLIST; } else { - rast_prim = - si_conv_tess_prim_to_gs_out(stages[MESA_SHADER_TESS_EVAL].nir->info.tess._primitive_mode); + rast_prim = si_conv_tess_prim_to_gs_out(stages[MESA_SHADER_TESS_EVAL].nir->info.tess._primitive_mode); } } else if (stages[MESA_SHADER_MESH].nir) { rast_prim = si_conv_gl_prim_to_gs_out(stages[MESA_SHADER_MESH].nir->info.mesh.primitive_type); @@ -2606,10 +2480,8 @@ radv_get_rasterization_prim(const struct radv_pipeline_stage *stages, } static bool -radv_skip_graphics_pipeline_compile(const struct radv_device *device, - const struct radv_graphics_pipeline *pipeline, - VkGraphicsPipelineLibraryFlagBitsEXT lib_flags, - bool fast_linking_enabled) +radv_skip_graphics_pipeline_compile(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, + VkGraphicsPipelineLibraryFlagBitsEXT lib_flags, bool fast_linking_enabled) { VkShaderStageFlagBits binary_stages = 0; @@ -2618,8 +2490,7 @@ radv_skip_graphics_pipeline_compile(const struct radv_device *device, return false; /* Do not skip when the PS epilog needs to be compiled. */ - if (!radv_pipeline_needs_dynamic_ps_epilog(pipeline) && - pipeline->base.shaders[MESA_SHADER_FRAGMENT] && + if (!radv_pipeline_needs_dynamic_ps_epilog(pipeline) && pipeline->base.shaders[MESA_SHADER_FRAGMENT] && pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.ps.has_epilog && !pipeline->ps_epilog) return false; @@ -2658,13 +2529,10 @@ radv_skip_graphics_pipeline_compile(const struct radv_device *device, } static VkResult -radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - struct radv_pipeline_layout *pipeline_layout, - struct radv_device *device, struct vk_pipeline_cache *cache, - const struct radv_pipeline_key *pipeline_key, - VkGraphicsPipelineLibraryFlagBitsEXT lib_flags, - bool fast_linking_enabled) +radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo, + struct radv_pipeline_layout *pipeline_layout, struct radv_device *device, + struct vk_pipeline_cache *cache, const struct radv_pipeline_key *pipeline_key, + VkGraphicsPipelineLibraryFlagBitsEXT lib_flags, bool fast_linking_enabled) { struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL}; struct radv_shader_binary *gs_copy_binary = NULL; @@ -2680,8 +2548,7 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, }; bool skip_shaders_cache = false; VkResult result = VK_SUCCESS; - const bool retain_shaders = - !!(pCreateInfo->flags & VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT); + const bool retain_shaders = !!(pCreateInfo->flags & VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT); int64_t pipeline_start = os_time_get_nano(); @@ -2729,22 +2596,20 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, } bool found_in_application_cache = true; - if (!skip_shaders_cache && radv_pipeline_cache_search(device, cache, &pipeline->base, hash, - &found_in_application_cache)) { + if (!skip_shaders_cache && + radv_pipeline_cache_search(device, cache, &pipeline->base, hash, &found_in_application_cache)) { if (found_in_application_cache) - pipeline_feedback.flags |= - VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; + pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; if (retain_shaders) { /* For graphics pipeline libraries created with the RETAIN_LINK_TIME_OPTIMIZATION flag, we * need to retain the stage info because we can't know if the LTO pipelines will * be find in the shaders cache. */ - struct radv_graphics_lib_pipeline *gfx_pipeline_lib = - radv_pipeline_to_graphics_lib(&pipeline->base); + struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(&pipeline->base); - gfx_pipeline_lib->stages = radv_copy_shader_stage_create_info( - device, pCreateInfo->stageCount, pCreateInfo->pStages, gfx_pipeline_lib->mem_ctx); + gfx_pipeline_lib->stages = radv_copy_shader_stage_create_info(device, pCreateInfo->stageCount, + pCreateInfo->pStages, gfx_pipeline_lib->mem_ctx); if (!gfx_pipeline_lib->stages) return VK_ERROR_OUT_OF_HOST_MEMORY; @@ -2772,7 +2637,8 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, bool optimize_conservatively = pipeline_key->optimisations_disabled; - radv_foreach_stage (i, active_nir_stages) { + radv_foreach_stage(i, active_nir_stages) + { radv_nir_shader_info_init(&stages[i].info); } @@ -2780,8 +2646,7 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, radv_fill_shader_info_ngg(device, pipeline, pipeline_key, stages); if (stages[MESA_SHADER_GEOMETRY].nir) { - gl_shader_stage pre_stage = - stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; + gl_shader_stage pre_stage = stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; unsigned nir_gs_flags = nir_lower_gs_intrinsics_per_stream; if (stages[pre_stage].info.is_ngg) { @@ -2798,11 +2663,11 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, if (stages[MESA_SHADER_FRAGMENT].nir) { unsigned rast_prim = radv_get_rasterization_prim(stages, pipeline_key); - NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_barycentric, pipeline_key, - rast_prim); + NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_barycentric, pipeline_key, rast_prim); } - radv_foreach_stage (i, active_nir_stages) { + radv_foreach_stage(i, active_nir_stages) + { int64_t stage_start = os_time_get_nano(); radv_optimize_nir(stages[i].nir, optimize_conservatively); @@ -2818,16 +2683,15 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, radv_nir_lower_poly_line_smooth(stages[MESA_SHADER_FRAGMENT].nir, pipeline_key); } - radv_fill_shader_info(device, pipeline, pipeline_layout, pipeline_key, stages, - active_nir_stages); + radv_fill_shader_info(device, pipeline, pipeline_layout, pipeline_key, stages, active_nir_stages); radv_declare_pipeline_args(device, stages, pipeline_key, active_nir_stages); - radv_foreach_stage (i, active_nir_stages) { + radv_foreach_stage(i, active_nir_stages) + { int64_t stage_start = os_time_get_nano(); - radv_postprocess_nir(device, pipeline_layout, pipeline_key, pipeline->last_vgt_api_stage, - &stages[i]); + radv_postprocess_nir(device, pipeline_layout, pipeline_key, pipeline->last_vgt_api_stage, &stages[i]); stages[i].feedback.duration += os_time_get_nano() - stage_start; @@ -2836,9 +2700,8 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, } /* Compile NIR shaders to AMD assembly. */ - radv_pipeline_nir_to_asm(device, pipeline, cache, stages, pipeline_key, pipeline_layout, - keep_executable_info, keep_statistic_info, active_nir_stages, binaries, - &gs_copy_binary); + radv_pipeline_nir_to_asm(device, pipeline, cache, stages, pipeline_key, pipeline_layout, keep_executable_info, + keep_statistic_info, active_nir_stages, binaries, &gs_copy_binary); if (!radv_pipeline_create_ps_epilog(device, pipeline, pipeline_key, lib_flags, &ps_epilog_binary)) return VK_ERROR_OUT_OF_DEVICE_MEMORY; @@ -2898,15 +2761,14 @@ done: if (libs_info) { for (uint32_t i = 0; i < libs_info->libraryCount; i++) { RADV_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]); - struct radv_graphics_lib_pipeline *gfx_pipeline_lib = - radv_pipeline_to_graphics_lib(pipeline_lib); + struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib); if (!gfx_pipeline_lib->base.active_stages) continue; - radv_foreach_stage(s, gfx_pipeline_lib->base.active_stages) { - creation_feedback->pPipelineStageCreationFeedbacks[num_feedbacks++] = - stages[s].feedback; + radv_foreach_stage(s, gfx_pipeline_lib->base.active_stages) + { + creation_feedback->pPipelineStageCreationFeedbacks[num_feedbacks++] = stages[s].feedback; } } } @@ -2919,8 +2781,7 @@ done: } static void -radv_pipeline_emit_blend_state(struct radeon_cmdbuf *ctx_cs, - const struct radv_graphics_pipeline *pipeline, +radv_pipeline_emit_blend_state(struct radeon_cmdbuf *ctx_cs, const struct radv_graphics_pipeline *pipeline, const struct radv_blend_state *blend) { struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; @@ -2962,8 +2823,8 @@ radv_pipeline_emit_vgt_gs_mode(const struct radv_device *device, struct radeon_c } static void -radv_emit_hw_vs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, - struct radeon_cmdbuf *cs, const struct radv_shader *shader) +radv_emit_hw_vs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, + const struct radv_shader *shader) { const struct radv_physical_device *pdevice = device->physical_device; uint64_t va = radv_shader_get_va(shader); @@ -2980,8 +2841,8 @@ radv_emit_hw_vs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, cull_dist_mask = outinfo->cull_dist_mask; total_mask = clip_dist_mask | cull_dist_mask; - bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer || - outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate; + bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index || + outinfo->writes_primitive_shading_rate; unsigned spi_vs_out_config, nparams; /* VS is required to export at least one param. */ @@ -2997,12 +2858,9 @@ radv_emit_hw_vs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, radeon_set_context_reg( ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT, S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | - S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP - : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP - : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP - : V_02870C_SPI_SHADER_NONE)); + S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | + S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | + S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE)); radeon_set_context_reg( ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL, @@ -3011,23 +2869,22 @@ radv_emit_hw_vs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) | S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | - S_02881C_VS_OUT_MISC_SIDE_BUS_ENA( - misc_vec_ena || (pdevice->rad_info.gfx_level >= GFX10_3 && outinfo->pos_exports > 1)) | + S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena || + (pdevice->rad_info.gfx_level >= GFX10_3 && outinfo->pos_exports > 1)) | S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | - S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | total_mask << 8 | - clip_dist_mask); + S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | total_mask << 8 | clip_dist_mask); if (pdevice->rad_info.gfx_level <= GFX8) radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, outinfo->writes_viewport_index); unsigned late_alloc_wave64, cu_mask; - ac_compute_late_alloc(&pdevice->rad_info, false, false, - shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64, &cu_mask); + ac_compute_late_alloc(&pdevice->rad_info, false, false, shader->config.scratch_bytes_per_wave > 0, + &late_alloc_wave64, &cu_mask); if (pdevice->rad_info.gfx_level >= GFX7) { - radeon_set_sh_reg_idx(pdevice, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3, - ac_apply_cu_en(S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F), - C_00B118_CU_EN, 0, &pdevice->rad_info)); + radeon_set_sh_reg_idx( + pdevice, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3, + ac_apply_cu_en(S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F), C_00B118_CU_EN, 0, &pdevice->rad_info)); radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64)); } if (pdevice->rad_info.gfx_level >= GFX10) { @@ -3059,14 +2916,12 @@ radv_emit_hw_ls(struct radeon_cmdbuf *cs, const struct radv_shader *shader) } static void -radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, - struct radeon_cmdbuf *cs, const struct radv_shader *es, - const struct radv_shader *shader) +radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, + const struct radv_shader *es, const struct radv_shader *shader) { const struct radv_physical_device *pdevice = device->physical_device; uint64_t va = radv_shader_get_va(shader); - gl_shader_stage es_type = - shader->info.stage == MESA_SHADER_GEOMETRY ? shader->info.gs.es_type : shader->info.stage; + gl_shader_stage es_type = shader->info.stage == MESA_SHADER_GEOMETRY ? shader->info.gs.es_type : shader->info.stage; const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info; radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); @@ -3081,8 +2936,8 @@ radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, cull_dist_mask = outinfo->cull_dist_mask; total_mask = clip_dist_mask | cull_dist_mask; - bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer || - outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate; + bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index || + outinfo->writes_primitive_shading_rate; bool es_enable_prim_id = outinfo->export_prim_id || (es && es->info.uses_prim_id); bool break_wave_at_eoi = false; unsigned ge_cntl; @@ -3096,8 +2951,7 @@ radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, unsigned num_params = MAX2(outinfo->param_exports, 1); unsigned num_prim_params = outinfo->prim_param_exports; radeon_set_context_reg(ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(num_params - 1) | - S_0286C4_PRIM_EXPORT_COUNT(num_prim_params) | + S_0286C4_VS_EXPORT_COUNT(num_params - 1) | S_0286C4_PRIM_EXPORT_COUNT(num_prim_params) | S_0286C4_NO_PC_EXPORT(no_pc_export)); unsigned idx_format = V_028708_SPI_SHADER_1COMP; @@ -3105,17 +2959,13 @@ radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, outinfo->writes_primitive_shading_rate_per_primitive) idx_format = V_028708_SPI_SHADER_2COMP; - radeon_set_context_reg(ctx_cs, R_028708_SPI_SHADER_IDX_FORMAT, - S_028708_IDX0_EXPORT_FORMAT(idx_format)); + radeon_set_context_reg(ctx_cs, R_028708_SPI_SHADER_IDX_FORMAT, S_028708_IDX0_EXPORT_FORMAT(idx_format)); radeon_set_context_reg( ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT, S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | - S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP - : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP - : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP - : V_02870C_SPI_SHADER_NONE)); + S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | + S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | + S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE)); radeon_set_context_reg( ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL, @@ -3124,47 +2974,41 @@ radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) | S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | - S_02881C_VS_OUT_MISC_SIDE_BUS_ENA( - misc_vec_ena || (pdevice->rad_info.gfx_level >= GFX10_3 && outinfo->pos_exports > 1)) | + S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena || + (pdevice->rad_info.gfx_level >= GFX10_3 && outinfo->pos_exports > 1)) | S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | - S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | total_mask << 8 | - clip_dist_mask); + S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | total_mask << 8 | clip_dist_mask); - radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, - S_028A84_PRIMITIVEID_EN(es_enable_prim_id) | - S_028A84_NGG_DISABLE_PROVOK_REUSE(outinfo->export_prim_id)); + radeon_set_context_reg( + ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, + S_028A84_PRIMITIVEID_EN(es_enable_prim_id) | S_028A84_NGG_DISABLE_PROVOK_REUSE(outinfo->export_prim_id)); /* NGG specific registers. */ - uint32_t gs_num_invocations = - shader->info.stage == MESA_SHADER_GEOMETRY ? shader->info.gs.invocations : 1; + uint32_t gs_num_invocations = shader->info.stage == MESA_SHADER_GEOMETRY ? shader->info.gs.invocations : 1; if (pdevice->rad_info.gfx_level < GFX11) { - radeon_set_context_reg( - ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, - S_028A44_ES_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) | - S_028A44_GS_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) | - S_028A44_GS_INST_PRIMS_IN_SUBGRP(ngg_state->max_gsprims * gs_num_invocations)); + radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, + S_028A44_ES_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) | + S_028A44_GS_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) | + S_028A44_GS_INST_PRIMS_IN_SUBGRP(ngg_state->max_gsprims * gs_num_invocations)); } radeon_set_context_reg(ctx_cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, S_0287FC_MAX_VERTS_PER_SUBGROUP(ngg_state->max_out_verts)); - radeon_set_context_reg(ctx_cs, R_028B4C_GE_NGG_SUBGRP_CNTL, - S_028B4C_PRIM_AMP_FACTOR(ngg_state->prim_amp_factor) | - S_028B4C_THDS_PER_SUBGRP(0)); /* for fast launch */ radeon_set_context_reg( - ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT, - S_028B90_CNT(gs_num_invocations) | S_028B90_ENABLE(gs_num_invocations > 1) | - S_028B90_EN_MAX_VERT_OUT_PER_GS_INSTANCE(ngg_state->max_vert_out_per_gs_instance)); + ctx_cs, R_028B4C_GE_NGG_SUBGRP_CNTL, + S_028B4C_PRIM_AMP_FACTOR(ngg_state->prim_amp_factor) | S_028B4C_THDS_PER_SUBGRP(0)); /* for fast launch */ + radeon_set_context_reg(ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT, + S_028B90_CNT(gs_num_invocations) | S_028B90_ENABLE(gs_num_invocations > 1) | + S_028B90_EN_MAX_VERT_OUT_PER_GS_INSTANCE(ngg_state->max_vert_out_per_gs_instance)); if (pdevice->rad_info.gfx_level >= GFX11) { ge_cntl = S_03096C_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) | S_03096C_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) | - S_03096C_BREAK_PRIMGRP_AT_EOI(break_wave_at_eoi) | - S_03096C_PRIM_GRP_SIZE_GFX11(252); + S_03096C_BREAK_PRIMGRP_AT_EOI(break_wave_at_eoi) | S_03096C_PRIM_GRP_SIZE_GFX11(252); } else { ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(ngg_state->max_gsprims) | - S_03096C_VERT_GRP_SIZE(ngg_state->hw_max_esverts) | - S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi); + S_03096C_VERT_GRP_SIZE(ngg_state->hw_max_esverts) | S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi); } /* Bug workaround for a possible hang with non-tessellation cases. @@ -3172,8 +3016,7 @@ radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, * * Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5 */ - if (pdevice->rad_info.gfx_level == GFX10 && es_type != MESA_SHADER_TESS_EVAL && - ngg_state->hw_max_esverts != 256) { + if (pdevice->rad_info.gfx_level == GFX10 && es_type != MESA_SHADER_TESS_EVAL && ngg_state->hw_max_esverts != 256) { ge_cntl &= C_03096C_VERT_GRP_SIZE; if (ngg_state->hw_max_esverts > 5) { @@ -3187,21 +3030,19 @@ radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, ac_compute_late_alloc(&pdevice->rad_info, true, shader->info.has_ngg_culling, shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64, &cu_mask); - radeon_set_sh_reg_idx(pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, - ac_apply_cu_en(S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F), - C_00B21C_CU_EN, 0, &pdevice->rad_info)); + radeon_set_sh_reg_idx( + pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, + ac_apply_cu_en(S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F), C_00B21C_CU_EN, 0, &pdevice->rad_info)); if (pdevice->rad_info.gfx_level >= GFX11) { radeon_set_sh_reg_idx( pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3, - ac_apply_cu_en( - S_00B204_CU_EN_GFX11(0x1) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64), - C_00B204_CU_EN_GFX11, 16, &pdevice->rad_info)); + ac_apply_cu_en(S_00B204_CU_EN_GFX11(0x1) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64), + C_00B204_CU_EN_GFX11, 16, &pdevice->rad_info)); } else { radeon_set_sh_reg_idx( pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3, - ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) | - S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64), + ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64), C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info)); } @@ -3221,8 +3062,7 @@ radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, } static void -radv_emit_hw_hs(const struct radv_device *device, struct radeon_cmdbuf *cs, - const struct radv_shader *shader) +radv_emit_hw_hs(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_shader *shader) { const struct radv_physical_device *pdevice = device->physical_device; uint64_t va = radv_shader_get_va(shader); @@ -3245,8 +3085,8 @@ radv_emit_hw_hs(const struct radv_device *device, struct radeon_cmdbuf *cs, } static void -radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, - struct radeon_cmdbuf *cs, const struct radv_shader *vs) +radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, + const struct radv_shader *vs) { if (vs->info.vs.as_ls) radv_emit_hw_ls(cs, vs); @@ -3259,15 +3099,14 @@ radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf * } static void -radv_emit_tess_ctrl_shader(const struct radv_device *device, struct radeon_cmdbuf *cs, - const struct radv_shader *tcs) +radv_emit_tess_ctrl_shader(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_shader *tcs) { radv_emit_hw_hs(device, cs, tcs); } static void -radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, - struct radeon_cmdbuf *cs, const struct radv_shader *tes) +radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, + const struct radv_shader *tes) { if (tes->info.is_ngg) { radv_emit_hw_ngg(device, ctx_cs, cs, NULL, tes); @@ -3279,8 +3118,8 @@ radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbu } static void -radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, - struct radeon_cmdbuf *cs, const struct radv_shader *gs) +radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, + const struct radv_shader *gs) { const struct radv_physical_device *pdevice = device->physical_device; const struct radv_legacy_gs_info *gs_state = &gs->info.gs_ring_info; @@ -3315,16 +3154,14 @@ radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, radeon_emit(ctx_cs, (max_stream >= 3) ? num_components[3] : 0); uint32_t gs_num_invocations = gs->info.gs.invocations; - radeon_set_context_reg( - ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT, - S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0)); + radeon_set_context_reg(ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT, + S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0)); if (pdevice->rad_info.gfx_level <= GFX8) { /* GFX6-8: ESGS offchip ring buffer is allocated according to VGT_ESGS_RING_ITEMSIZE. * GFX9+: Only used to set the GS input VGPRs, emulated in shaders. */ - radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, - gs_state->vgt_esgs_ring_itemsize); + radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, gs_state->vgt_esgs_ring_itemsize); } va = radv_shader_get_va(gs); @@ -3341,8 +3178,7 @@ radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, radeon_emit(cs, gs->config.rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size)); radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl); - radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, - gs_state->vgt_gs_max_prims_per_subgroup); + radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, gs_state->vgt_gs_max_prims_per_subgroup); } else { radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4); radeon_emit(cs, va >> 8); @@ -3351,22 +3187,21 @@ radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, radeon_emit(cs, gs->config.rsrc2); } - radeon_set_sh_reg_idx(pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, - ac_apply_cu_en(S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F), - C_00B21C_CU_EN, 0, &pdevice->rad_info)); + radeon_set_sh_reg_idx( + pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, + ac_apply_cu_en(S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F), C_00B21C_CU_EN, 0, &pdevice->rad_info)); if (pdevice->rad_info.gfx_level >= GFX10) { - radeon_set_sh_reg_idx( - pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3, - ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0), - C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info)); + radeon_set_sh_reg_idx(pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3, + ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0), + C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info)); } } static void -radv_emit_geometry_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, - struct radeon_cmdbuf *cs, const struct radv_shader *gs, - const struct radv_shader *es, const struct radv_shader *gs_copy_shader) +radv_emit_geometry_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, + const struct radv_shader *gs, const struct radv_shader *es, + const struct radv_shader *gs_copy_shader) { if (gs->info.is_ngg) { radv_emit_hw_ngg(device, ctx_cs, cs, es, gs); @@ -3379,20 +3214,18 @@ radv_emit_geometry_shader(const struct radv_device *device, struct radeon_cmdbuf } static void -radv_emit_mesh_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, - struct radeon_cmdbuf *cs, const struct radv_shader *ms) +radv_emit_mesh_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, + const struct radv_shader *ms) { const struct radv_physical_device *pdevice = device->physical_device; radv_emit_hw_ngg(device, ctx_cs, cs, NULL, ms); radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, ms->info.workgroup_size); - radeon_set_uconfig_reg_idx(pdevice, ctx_cs, R_030908_VGT_PRIMITIVE_TYPE, 1, - V_008958_DI_PT_POINTLIST); + radeon_set_uconfig_reg_idx(pdevice, ctx_cs, R_030908_VGT_PRIMITIVE_TYPE, 1, V_008958_DI_PT_POINTLIST); } static uint32_t -offset_to_ps_input(uint32_t offset, bool flat_shade, bool explicit, bool per_vertex, bool float16, - bool per_prim_gfx11) +offset_to_ps_input(uint32_t offset, bool flat_shade, bool explicit, bool per_vertex, bool float16, bool per_prim_gfx11) { uint32_t ps_input_cntl; if (offset <= AC_EXP_PARAM_OFFSET_31) { @@ -3418,9 +3251,8 @@ offset_to_ps_input(uint32_t offset, bool flat_shade, bool explicit, bool per_ver } static void -single_slot_to_ps_input(const struct radv_vs_output_info *outinfo, unsigned slot, - uint32_t *ps_input_cntl, unsigned *ps_offset, bool skip_undef, - bool use_default_0, bool flat_shade, bool per_prim_gfx11) +single_slot_to_ps_input(const struct radv_vs_output_info *outinfo, unsigned slot, uint32_t *ps_input_cntl, + unsigned *ps_offset, bool skip_undef, bool use_default_0, bool flat_shade, bool per_prim_gfx11) { unsigned vs_offset = outinfo->vs_output_param_offset[slot]; @@ -3433,15 +3265,13 @@ single_slot_to_ps_input(const struct radv_vs_output_info *outinfo, unsigned slot unreachable("vs_offset should not be AC_EXP_PARAM_UNDEFINED."); } - ps_input_cntl[*ps_offset] = - offset_to_ps_input(vs_offset, flat_shade, false, false, false, per_prim_gfx11); + ps_input_cntl[*ps_offset] = offset_to_ps_input(vs_offset, flat_shade, false, false, false, per_prim_gfx11); ++(*ps_offset); } static void -input_mask_to_ps_inputs(const struct radv_vs_output_info *outinfo, const struct radv_shader *ps, - uint32_t input_mask, uint32_t *ps_input_cntl, unsigned *ps_offset, - bool per_prim_gfx11) +input_mask_to_ps_inputs(const struct radv_vs_output_info *outinfo, const struct radv_shader *ps, uint32_t input_mask, + uint32_t *ps_input_cntl, unsigned *ps_offset, bool per_prim_gfx11) { u_foreach_bit (i, input_mask) { unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i]; @@ -3474,27 +3304,23 @@ radv_emit_ps_inputs(const struct radv_device *device, struct radeon_cmdbuf *ctx_ unsigned ps_offset = 0; if (ps->info.ps.prim_id_input && !mesh) - single_slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, true, - false, true, false); + single_slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, true, false, true, false); if (ps->info.ps.layer_input && !mesh) - single_slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, false, true, - true, false); + single_slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, false, true, true, false); if (ps->info.ps.viewport_index_input && !mesh) - single_slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, false, - true, true, false); + single_slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, false, true, true, false); if (ps->info.ps.has_pcoord) ps_input_cntl[ps_offset++] = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20); if (ps->info.ps.num_input_clips_culls) { - single_slot_to_ps_input(outinfo, VARYING_SLOT_CLIP_DIST0, ps_input_cntl, &ps_offset, true, - false, false, false); + single_slot_to_ps_input(outinfo, VARYING_SLOT_CLIP_DIST0, ps_input_cntl, &ps_offset, true, false, false, false); if (ps->info.ps.num_input_clips_culls > 4) - single_slot_to_ps_input(outinfo, VARYING_SLOT_CLIP_DIST1, ps_input_cntl, &ps_offset, true, - false, false, false); + single_slot_to_ps_input(outinfo, VARYING_SLOT_CLIP_DIST1, ps_input_cntl, &ps_offset, true, false, false, + false); } input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_mask, ps_input_cntl, &ps_offset, false); @@ -3502,19 +3328,16 @@ radv_emit_ps_inputs(const struct radv_device *device, struct radeon_cmdbuf *ctx_ /* Per-primitive PS inputs: the HW needs these to be last. */ if (ps->info.ps.prim_id_input && mesh) - single_slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, true, - false, false, gfx11plus); + single_slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, true, false, false, + gfx11plus); if (ps->info.ps.layer_input && mesh) - single_slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, false, true, - false, gfx11plus); + single_slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, false, true, false, gfx11plus); if (ps->info.ps.viewport_index_input && mesh) - single_slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, false, - true, false, gfx11plus); + single_slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, false, true, false, gfx11plus); - input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_per_primitive_mask, ps_input_cntl, - &ps_offset, gfx11plus); + input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_per_primitive_mask, ps_input_cntl, &ps_offset, gfx11plus); if (ps_offset) { radeon_set_context_reg_seq(ctx_cs, R_028644_SPI_PS_INPUT_CNTL_0, ps_offset); for (unsigned i = 0; i < ps_offset; i++) { @@ -3524,8 +3347,8 @@ radv_emit_ps_inputs(const struct radv_device *device, struct radeon_cmdbuf *ctx_ } static void -radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, - struct radeon_cmdbuf *cs, const struct radv_shader *ps) +radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, + const struct radv_shader *ps) { const struct radv_physical_device *pdevice = device->physical_device; bool param_gen; @@ -3544,19 +3367,16 @@ radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf radeon_emit(ctx_cs, ps->config.spi_ps_input_addr); /* Workaround when there are no PS inputs but LDS is used. */ - param_gen = - pdevice->rad_info.gfx_level >= GFX11 && !ps->info.ps.num_interp && ps->config.lds_size; + param_gen = pdevice->rad_info.gfx_level >= GFX11 && !ps->info.ps.num_interp && ps->config.lds_size; radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL, S_0286D8_NUM_INTERP(ps->info.ps.num_interp) | S_0286D8_NUM_PRIM_INTERP(ps->info.ps.num_prim_interp) | - S_0286D8_PS_W32_EN(ps->info.wave_size == 32) | - S_0286D8_PARAM_GEN(param_gen)); + S_0286D8_PS_W32_EN(ps->info.wave_size == 32) | S_0286D8_PARAM_GEN(param_gen)); - radeon_set_context_reg( - ctx_cs, R_028710_SPI_SHADER_Z_FORMAT, - ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil, - ps->info.ps.writes_sample_mask, ps->info.ps.writes_mrt0_alpha)); + radeon_set_context_reg(ctx_cs, R_028710_SPI_SHADER_Z_FORMAT, + ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil, + ps->info.ps.writes_sample_mask, ps->info.ps.writes_mrt0_alpha)); } static void @@ -3574,13 +3394,11 @@ radv_pipeline_emit_vgt_vertex_reuse(const struct radv_device *device, struct rad TESS_SPACING_FRACTIONAL_ODD) { vtx_reuse_depth = 14; } - radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, - S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth)); + radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth)); } static struct radv_vgt_shader_key -radv_pipeline_generate_vgt_shader_key(const struct radv_device *device, - const struct radv_graphics_pipeline *pipeline) +radv_pipeline_generate_vgt_shader_key(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline) { uint8_t hs_size = 64, gs_size = 64, vs_size = 64; struct radv_vgt_shader_key key; @@ -3615,8 +3433,7 @@ radv_pipeline_generate_vgt_shader_key(const struct radv_device *device, key.streamout = !!pipeline->streamout_shader; if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) { key.mesh = 1; - key.mesh_scratch_ring = - pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring; + key.mesh_scratch_ring = pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring; } key.hs_wave32 = hs_size == 32; @@ -3646,18 +3463,15 @@ radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdb stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1); } else if (key->mesh) { assert(!key->ngg_passthrough); - stages |= S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(1) | - S_028B54_NGG_WAVE_ID_EN(key->mesh_scratch_ring); + stages |= S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(1) | S_028B54_NGG_WAVE_ID_EN(key->mesh_scratch_ring); } else if (key->ngg) { stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL); } if (key->ngg) { - stages |= S_028B54_PRIMGEN_EN(1) | - S_028B54_NGG_WAVE_ID_EN(key->streamout) | + stages |= S_028B54_PRIMGEN_EN(1) | S_028B54_NGG_WAVE_ID_EN(key->streamout) | S_028B54_PRIMGEN_PASSTHRU_EN(key->ngg_passthrough) | - S_028B54_PRIMGEN_PASSTHRU_NO_MSG(key->ngg_passthrough && - pdevice->rad_info.family >= CHIP_NAVI23); + S_028B54_PRIMGEN_PASSTHRU_NO_MSG(key->ngg_passthrough && pdevice->rad_info.family >= CHIP_NAVI23); } else if (key->gs) { stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); } @@ -3666,8 +3480,7 @@ radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdb stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2); if (pdevice->rad_info.gfx_level >= GFX10) { - stages |= S_028B54_HS_W32_EN(key->hs_wave32) | - S_028B54_GS_W32_EN(key->gs_wave32) | + stages |= S_028B54_HS_W32_EN(key->hs_wave32) | S_028B54_GS_W32_EN(key->gs_wave32) | S_028B54_VS_W32_EN(pdevice->rad_info.gfx_level < GFX11 && key->vs_wave32); /* Legacy GS only supports Wave64. Read it as an implication. */ assert(!(key->gs && !key->ngg) || !key->gs_wave32); @@ -3678,8 +3491,7 @@ radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdb static void radv_pipeline_emit_vgt_gs_out(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, - const struct radv_graphics_pipeline *pipeline, - uint32_t vgt_gs_out_prim_type) + const struct radv_graphics_pipeline *pipeline, uint32_t vgt_gs_out_prim_type) { const struct radv_physical_device *pdevice = device->physical_device; @@ -3691,8 +3503,7 @@ radv_pipeline_emit_vgt_gs_out(const struct radv_device *device, struct radeon_cm } static void -gfx103_pipeline_emit_vgt_draw_payload_cntl(struct radeon_cmdbuf *ctx_cs, - const struct radv_graphics_pipeline *pipeline, +gfx103_pipeline_emit_vgt_draw_payload_cntl(struct radeon_cmdbuf *ctx_cs, const struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state) { const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); @@ -3702,18 +3513,16 @@ gfx103_pipeline_emit_vgt_draw_payload_cntl(struct radeon_cmdbuf *ctx_cs, /* Enables the second channel of the primitive export instruction. * This channel contains: VRS rate x, y, viewport and layer. */ - bool enable_prim_payload = outinfo && (outinfo->writes_viewport_index_per_primitive || - outinfo->writes_layer_per_primitive || - outinfo->writes_primitive_shading_rate_per_primitive); + bool enable_prim_payload = + outinfo && (outinfo->writes_viewport_index_per_primitive || outinfo->writes_layer_per_primitive || + outinfo->writes_primitive_shading_rate_per_primitive); - radeon_set_context_reg( - ctx_cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, - S_028A98_EN_VRS_RATE(enable_vrs) | S_028A98_EN_PRIM_PAYLOAD(enable_prim_payload)); + radeon_set_context_reg(ctx_cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, + S_028A98_EN_VRS_RATE(enable_vrs) | S_028A98_EN_PRIM_PAYLOAD(enable_prim_payload)); } static bool -gfx103_pipeline_vrs_coarse_shading(const struct radv_device *device, - const struct radv_graphics_pipeline *pipeline) +gfx103_pipeline_vrs_coarse_shading(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline) { struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; @@ -3751,24 +3560,21 @@ gfx103_pipeline_emit_vrs_state(const struct radv_device *device, struct radeon_c * requested by the user. Note that vkd3d-proton always has to declare VRS as dynamic because * in DX12 it's fully dynamic. */ - radeon_set_context_reg( - ctx_cs, R_028848_PA_CL_VRS_CNTL, - S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE) | - S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE)); + radeon_set_context_reg(ctx_cs, R_028848_PA_CL_VRS_CNTL, + S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE) | + S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE)); /* If the shader is using discard, turn off coarse shading because discard at 2x2 pixel * granularity degrades quality too much. MIN allows sample shading but not coarse shading. */ struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; - mode = ps->info.ps.can_discard ? V_028064_SC_VRS_COMB_MODE_MIN - : V_028064_SC_VRS_COMB_MODE_PASSTHRU; + mode = ps->info.ps.can_discard ? V_028064_SC_VRS_COMB_MODE_MIN : V_028064_SC_VRS_COMB_MODE_PASSTHRU; } if (pdevice->rad_info.gfx_level < GFX11) { radeon_set_context_reg(ctx_cs, R_028064_DB_VRS_OVERRIDE_CNTL, - S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) | - S_028064_VRS_OVERRIDE_RATE_X(rate_x) | + S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) | S_028064_VRS_OVERRIDE_RATE_X(rate_x) | S_028064_VRS_OVERRIDE_RATE_Y(rate_y)); } } @@ -3790,8 +3596,7 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw)); ctx_cs->buf = cs->buf + cs->max_dw; - struct radv_vgt_shader_key vgt_shader_key = - radv_pipeline_generate_vgt_shader_key(device, pipeline); + struct radv_vgt_shader_key vgt_shader_key = radv_pipeline_generate_vgt_shader_key(device, pipeline); radv_pipeline_emit_blend_state(ctx_cs, pipeline, blend); radv_pipeline_emit_vgt_gs_mode(device, ctx_cs, pipeline); @@ -3808,16 +3613,13 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi radv_emit_tess_ctrl_shader(device, cs, pipeline->base.shaders[MESA_SHADER_TESS_CTRL]); if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) { - radv_emit_tess_eval_shader(device, ctx_cs, cs, - pipeline->base.shaders[MESA_SHADER_TESS_EVAL]); + radv_emit_tess_eval_shader(device, ctx_cs, cs, pipeline->base.shaders[MESA_SHADER_TESS_EVAL]); } - if (pdevice->rad_info.gfx_level >= GFX10 && - !radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) && + if (pdevice->rad_info.gfx_level >= GFX10 && !radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) && !radv_pipeline_has_ngg(pipeline)) { radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, - S_028A44_ES_VERTS_PER_SUBGRP(250) | - S_028A44_GS_PRIMS_PER_SUBGRP(126) | + S_028A44_ES_VERTS_PER_SUBGRP(250) | S_028A44_GS_PRIMS_PER_SUBGRP(126) | S_028A44_GS_INST_PRIMS_IN_SUBGRP(126)); } } @@ -3850,13 +3652,11 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi } static void -radv_pipeline_init_vertex_input_state(const struct radv_device *device, - struct radv_graphics_pipeline *pipeline, +radv_pipeline_init_vertex_input_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state) { const struct radv_physical_device *pdevice = device->physical_device; - const struct radv_shader_info *vs_info = - &radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX)->info; + const struct radv_shader_info *vs_info = &radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX)->info; if (state->vi) { u_foreach_bit (i, state->vi->attributes_valid) { @@ -3881,8 +3681,7 @@ radv_pipeline_init_vertex_input_state(const struct radv_device *device, if (vs_info->vs.has_prolog && !(pipeline->dynamic_states & RADV_DYNAMIC_VERTEX_INPUT)) { const enum amd_gfx_level gfx_level = pdevice->rad_info.gfx_level; const enum radeon_family family = pdevice->rad_info.family; - const struct ac_vtx_format_info *vtx_info_table = - ac_get_vtx_format_info_table(gfx_level, family); + const struct ac_vtx_format_info *vtx_info_table = ac_get_vtx_format_info_table(gfx_level, family); pipeline->vs_input_state.bindings_match_attrib = true; @@ -3911,8 +3710,7 @@ radv_pipeline_init_vertex_input_state(const struct radv_device *device, const struct ac_vtx_format_info *vtx_info = &vtx_info_table[format]; pipeline->vs_input_state.formats[i] = format; - uint8_t align_req_minus_1 = - vtx_info->chan_byte_size >= 4 ? 3 : (vtx_info->element_size - 1); + uint8_t align_req_minus_1 = vtx_info->chan_byte_size >= 4 ? 3 : (vtx_info->element_size - 1); pipeline->vs_input_state.format_align_req_minus_1[i] = align_req_minus_1; pipeline->vs_input_state.format_sizes[i] = vtx_info->element_size; pipeline->vs_input_state.alpha_adjust_lo |= (vtx_info->alpha_adjust & 0x1) << i; @@ -3943,8 +3741,7 @@ radv_pipeline_get_streamout_shader(struct radv_graphics_pipeline *pipeline) return NULL; } static void -radv_pipeline_init_shader_stages_state(const struct radv_device *device, - struct radv_graphics_pipeline *pipeline) +radv_pipeline_init_shader_stages_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline) { for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { bool shader_exists = !!pipeline->base.shaders[i]; @@ -3959,41 +3756,34 @@ radv_pipeline_init_shader_stages_state(const struct radv_device *device, radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH) ? MESA_SHADER_MESH : MESA_SHADER_VERTEX; const struct radv_shader *shader = radv_get_shader(pipeline->base.shaders, first_stage); - const struct radv_userdata_info *loc = - radv_get_user_sgpr(shader, AC_UD_VS_BASE_VERTEX_START_INSTANCE); + const struct radv_userdata_info *loc = radv_get_user_sgpr(shader, AC_UD_VS_BASE_VERTEX_START_INSTANCE); if (loc->sgpr_idx != -1) { pipeline->vtx_base_sgpr = shader->info.user_data_0; pipeline->vtx_base_sgpr += loc->sgpr_idx * 4; pipeline->vtx_emit_num = loc->num_sgprs; - pipeline->uses_drawid = - radv_get_shader(pipeline->base.shaders, first_stage)->info.vs.needs_draw_id; - pipeline->uses_baseinstance = - radv_get_shader(pipeline->base.shaders, first_stage)->info.vs.needs_base_instance; + pipeline->uses_drawid = radv_get_shader(pipeline->base.shaders, first_stage)->info.vs.needs_draw_id; + pipeline->uses_baseinstance = radv_get_shader(pipeline->base.shaders, first_stage)->info.vs.needs_base_instance; assert(first_stage != MESA_SHADER_MESH || !pipeline->uses_baseinstance); } } static uint32_t -radv_pipeline_init_vgt_gs_out(struct radv_graphics_pipeline *pipeline, - const struct vk_graphics_pipeline_state *state) +radv_pipeline_init_vgt_gs_out(struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state) { uint32_t gs_out; if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { - gs_out = si_conv_gl_prim_to_gs_out( - pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim); + gs_out = si_conv_gl_prim_to_gs_out(pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim); } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) { if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.tes.point_mode) { gs_out = V_028A6C_POINTLIST; } else { - gs_out = si_conv_tess_prim_to_gs_out( - pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.tes._primitive_mode); + gs_out = si_conv_tess_prim_to_gs_out(pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.tes._primitive_mode); } } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) { - gs_out = - si_conv_gl_prim_to_gs_out(pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.output_prim); + gs_out = si_conv_gl_prim_to_gs_out(pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.output_prim); } else { gs_out = si_conv_prim_to_gs_out(si_translate_prim(state->ia->primitive_topology), false); } @@ -4003,10 +3793,8 @@ radv_pipeline_init_vgt_gs_out(struct radv_graphics_pipeline *pipeline, static void radv_pipeline_init_extra(struct radv_graphics_pipeline *pipeline, - const struct radv_graphics_pipeline_create_info *extra, - struct radv_blend_state *blend_state, - const struct vk_graphics_pipeline_state *state, - uint32_t *vgt_gs_out_prim_type) + const struct radv_graphics_pipeline_create_info *extra, struct radv_blend_state *blend_state, + const struct vk_graphics_pipeline_state *state, uint32_t *vgt_gs_out_prim_type) { if (extra->custom_blend_mode == V_028808_CB_ELIMINATE_FAST_CLEAR || extra->custom_blend_mode == V_028808_CB_FMASK_DECOMPRESS || @@ -4036,8 +3824,7 @@ radv_pipeline_init_extra(struct radv_graphics_pipeline *pipeline, pipeline->db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear); pipeline->db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->resummarize_enable); pipeline->db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->depth_compress_disable); - pipeline->db_render_control |= - S_028000_STENCIL_COMPRESS_DISABLE(extra->stencil_compress_disable); + pipeline->db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->stencil_compress_disable); } } @@ -4086,8 +3873,7 @@ radv_needs_null_export_workaround(const struct radv_device *device, const struct static VkResult radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv_device *device, - struct vk_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, + struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct radv_graphics_pipeline_create_info *extra) { VkGraphicsPipelineLibraryFlagBitsEXT needed_lib_flags = ALL_GRAPHICS_LIB_FLAGS; @@ -4105,13 +3891,11 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv /* If we have libraries, import them first. */ if (libs_info) { - const bool link_optimize = - (pCreateInfo->flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) != 0; + const bool link_optimize = (pCreateInfo->flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) != 0; for (uint32_t i = 0; i < libs_info->libraryCount; i++) { RADV_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]); - struct radv_graphics_lib_pipeline *gfx_pipeline_lib = - radv_pipeline_to_graphics_lib(pipeline_lib); + struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib); assert(pipeline_lib->type == RADV_PIPELINE_GRAPHICS_LIB); @@ -4120,16 +3904,15 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv */ assert(!link_optimize || gfx_pipeline_lib->base.retain_shaders); - radv_graphics_pipeline_import_lib(device, pipeline, &state, &pipeline_layout, - gfx_pipeline_lib, link_optimize); + radv_graphics_pipeline_import_lib(device, pipeline, &state, &pipeline_layout, gfx_pipeline_lib, link_optimize); needed_lib_flags &= ~gfx_pipeline_lib->lib_flags; } } /* Import graphics pipeline info that was not included in the libraries. */ - result = radv_pipeline_import_graphics_info(device, pipeline, &state, &pipeline_layout, - pCreateInfo, needed_lib_flags); + result = + radv_pipeline_import_graphics_info(device, pipeline, &state, &pipeline_layout, pCreateInfo, needed_lib_flags); if (result != VK_SUCCESS) { radv_pipeline_layout_finish(device, &pipeline_layout); return result; @@ -4138,13 +3921,12 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv if (!fast_linking_enabled) radv_pipeline_layout_hash(&pipeline_layout); - if (!radv_skip_graphics_pipeline_compile(device, pipeline, needed_lib_flags, - fast_linking_enabled)) { - struct radv_pipeline_key key = radv_generate_graphics_pipeline_key( - device, pipeline, pCreateInfo, &state, needed_lib_flags); + if (!radv_skip_graphics_pipeline_compile(device, pipeline, needed_lib_flags, fast_linking_enabled)) { + struct radv_pipeline_key key = + radv_generate_graphics_pipeline_key(device, pipeline, pCreateInfo, &state, needed_lib_flags); - result = radv_graphics_pipeline_compile(pipeline, pCreateInfo, &pipeline_layout, device, - cache, &key, needed_lib_flags, fast_linking_enabled); + result = radv_graphics_pipeline_compile(pipeline, pCreateInfo, &pipeline_layout, device, cache, &key, + needed_lib_flags, fast_linking_enabled); if (result != VK_SUCCESS) { radv_pipeline_layout_finish(device, &pipeline_layout); return result; @@ -4153,8 +3935,7 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv uint32_t vgt_gs_out_prim_type = radv_pipeline_init_vgt_gs_out(pipeline, &state); - radv_pipeline_init_multisample_state(device, pipeline, pCreateInfo, &state, - vgt_gs_out_prim_type); + radv_pipeline_init_multisample_state(device, pipeline, pCreateInfo, &state, vgt_gs_out_prim_type); if (!radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) radv_pipeline_init_input_assembly_state(device, pipeline); @@ -4179,8 +3960,7 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv } unsigned custom_blend_mode = extra ? extra->custom_blend_mode : 0; - if (radv_needs_null_export_workaround(device, ps, custom_blend_mode) && - !blend.spi_shader_col_format) { + if (radv_needs_null_export_workaround(device, ps, custom_blend_mode) && !blend.spi_shader_col_format) { blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R; pipeline->col_format_non_compacted = V_028714_SPI_SHADER_32_R; } @@ -4196,13 +3976,10 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv pipeline->is_ngg = radv_pipeline_has_ngg(pipeline); pipeline->has_ngg_culling = - pipeline->is_ngg && - pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.has_ngg_culling; - pipeline->force_vrs_per_vertex = - pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.force_vrs_per_vertex; + pipeline->is_ngg && pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.has_ngg_culling; + pipeline->force_vrs_per_vertex = pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.force_vrs_per_vertex; pipeline->rast_prim = vgt_gs_out_prim_type; - pipeline->uses_out_of_order_rast = - state.rs->rasterization_order_amd == VK_RASTERIZATION_ORDER_RELAXED_AMD; + pipeline->uses_out_of_order_rast = state.rs->rasterization_order_amd == VK_RASTERIZATION_ORDER_RELAXED_AMD; pipeline->uses_vrs_attachment = radv_pipeline_uses_vrs_attachment(pCreateInfo, &state); pipeline->base.push_constant_size = pipeline_layout.push_constant_size; @@ -4232,8 +4009,7 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv } VkResult -radv_graphics_pipeline_create(VkDevice _device, VkPipelineCache _cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, +radv_graphics_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct radv_graphics_pipeline_create_info *extra, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) { @@ -4242,8 +4018,7 @@ radv_graphics_pipeline_create(VkDevice _device, VkPipelineCache _cache, struct radv_graphics_pipeline *pipeline; VkResult result; - pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -4257,8 +4032,7 @@ radv_graphics_pipeline_create(VkDevice _device, VkPipelineCache _cache, } *pPipeline = radv_pipeline_to_handle(&pipeline->base); - radv_rmv_log_graphics_pipeline_create(device, pCreateInfo->flags, &pipeline->base, - pipeline->base.is_internal); + radv_rmv_log_graphics_pipeline_create(device, pCreateInfo->flags, &pipeline->base, pipeline->base.is_internal); return VK_SUCCESS; } @@ -4279,9 +4053,8 @@ radv_destroy_graphics_pipeline(struct radv_device *device, struct radv_graphics_ } static VkResult -radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, - struct radv_device *device, struct vk_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo) +radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, struct radv_device *device, + struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo) { VkResult result; @@ -4304,16 +4077,14 @@ radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, /* If we have libraries, import them first. */ if (libs_info) { - const bool link_optimize = - (pCreateInfo->flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) != 0; + const bool link_optimize = (pCreateInfo->flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) != 0; for (uint32_t i = 0; i < libs_info->libraryCount; i++) { RADV_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]); - struct radv_graphics_lib_pipeline *gfx_pipeline_lib = - radv_pipeline_to_graphics_lib(pipeline_lib); + struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib); - radv_graphics_pipeline_import_lib(device, &pipeline->base, state, pipeline_layout, - gfx_pipeline_lib, link_optimize); + radv_graphics_pipeline_import_lib(device, &pipeline->base, state, pipeline_layout, gfx_pipeline_lib, + link_optimize); pipeline->lib_flags |= gfx_pipeline_lib->lib_flags; @@ -4321,19 +4092,19 @@ radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, } } - result = radv_pipeline_import_graphics_info(device, &pipeline->base, state, pipeline_layout, - pCreateInfo, needed_lib_flags); + result = radv_pipeline_import_graphics_info(device, &pipeline->base, state, pipeline_layout, pCreateInfo, + needed_lib_flags); if (result != VK_SUCCESS) return result; if (!fast_linking_enabled) radv_pipeline_layout_hash(pipeline_layout); - struct radv_pipeline_key key = radv_generate_graphics_pipeline_key( - device, &pipeline->base, pCreateInfo, state, needed_lib_flags); + struct radv_pipeline_key key = + radv_generate_graphics_pipeline_key(device, &pipeline->base, pCreateInfo, state, needed_lib_flags); - return radv_graphics_pipeline_compile(&pipeline->base, pCreateInfo, pipeline_layout, device, - cache, &key, needed_lib_flags, fast_linking_enabled); + return radv_graphics_pipeline_compile(&pipeline->base, pCreateInfo, pipeline_layout, device, cache, &key, + needed_lib_flags, fast_linking_enabled); } static VkResult @@ -4346,8 +4117,7 @@ radv_graphics_lib_pipeline_create(VkDevice _device, VkPipelineCache _cache, struct radv_graphics_lib_pipeline *pipeline; VkResult result; - pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -4367,8 +4137,7 @@ radv_graphics_lib_pipeline_create(VkDevice _device, VkPipelineCache _cache, } void -radv_destroy_graphics_lib_pipeline(struct radv_device *device, - struct radv_graphics_lib_pipeline *pipeline) +radv_destroy_graphics_lib_pipeline(struct radv_device *device, struct radv_graphics_lib_pipeline *pipeline) { radv_pipeline_layout_finish(device, &pipeline->layout); @@ -4383,8 +4152,8 @@ radv_destroy_graphics_lib_pipeline(struct radv_device *device, VKAPI_ATTR VkResult VKAPI_CALL radv_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, - const VkGraphicsPipelineCreateInfo *pCreateInfos, - const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) + const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) { VkResult result = VK_SUCCESS; unsigned i = 0; @@ -4392,11 +4161,9 @@ radv_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, ui for (; i < count; i++) { VkResult r; if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) { - r = radv_graphics_lib_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, - &pPipelines[i]); + r = radv_graphics_lib_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i]); } else { - r = radv_graphics_pipeline_create(_device, pipelineCache, &pCreateInfos[i], NULL, - pAllocator, &pPipelines[i]); + r = radv_graphics_pipeline_create(_device, pipelineCache, &pCreateInfos[i], NULL, pAllocator, &pPipelines[i]); } if (r != VK_SUCCESS) { result = r; diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 94fb4bc..8e15f45 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -34,8 +34,8 @@ struct rt_handle_hash_entry { }; static uint32_t -handle_from_stages(struct radv_device *device, struct radv_ray_tracing_stage *stages, - unsigned stage_count, bool replay_namespace) +handle_from_stages(struct radv_device *device, struct radv_ray_tracing_stage *stages, unsigned stage_count, + bool replay_namespace) { struct mesa_sha1 ctx; _mesa_sha1_init(&ctx); @@ -83,13 +83,10 @@ handle_from_stages(struct radv_device *device, struct radv_ray_tracing_stage *st } static VkResult -radv_create_group_handles(struct radv_device *device, - const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, - struct radv_ray_tracing_stage *stages, - struct radv_ray_tracing_group *groups) +radv_create_group_handles(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, + struct radv_ray_tracing_stage *stages, struct radv_ray_tracing_group *groups) { - bool capture_replay = pCreateInfo->flags & - VK_PIPELINE_CREATE_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR; + bool capture_replay = pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR; for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) { const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i]; switch (group_info->type) { @@ -113,8 +110,7 @@ radv_create_group_handles(struct radv_device *device, if (group_info->anyHitShader != VK_SHADER_UNUSED_KHR) temp_stages[cnt++] = stages[group_info->anyHitShader]; - groups[i].handle.intersection_index = - handle_from_stages(device, temp_stages, cnt, capture_replay); + groups[i].handle.intersection_index = handle_from_stages(device, temp_stages, cnt, capture_replay); } break; case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: @@ -133,8 +129,7 @@ radv_create_group_handles(struct radv_device *device, if (capture_replay) { if (group_info->pShaderGroupCaptureReplayHandle && - memcmp(group_info->pShaderGroupCaptureReplayHandle, &groups[i].handle, - sizeof(groups[i].handle)) != 0) { + memcmp(group_info->pShaderGroupCaptureReplayHandle, &groups[i].handle, sizeof(groups[i].handle)) != 0) { return VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS; } } @@ -144,10 +139,8 @@ radv_create_group_handles(struct radv_device *device, } static VkResult -radv_rt_fill_group_info(struct radv_device *device, - const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, - struct radv_ray_tracing_stage *stages, - struct radv_ray_tracing_group *groups) +radv_rt_fill_group_info(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, + struct radv_ray_tracing_stage *stages, struct radv_ray_tracing_group *groups) { VkResult result = radv_create_group_handles(device, pCreateInfo, stages, groups); @@ -167,8 +160,7 @@ radv_rt_fill_group_info(struct radv_device *device, unsigned stage_count = pCreateInfo->stageCount; for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) { RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]); - struct radv_ray_tracing_pipeline *library_pipeline = - radv_pipeline_to_ray_tracing(pipeline); + struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline); for (unsigned j = 0; j < library_pipeline->group_count; ++j) { struct radv_ray_tracing_group *dst = &groups[idx + j]; @@ -191,8 +183,7 @@ radv_rt_fill_group_info(struct radv_device *device, } static void -radv_rt_fill_stage_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, - struct radv_ray_tracing_stage *stages) +radv_rt_fill_stage_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct radv_ray_tracing_stage *stages) { uint32_t idx; for (idx = 0; idx < pCreateInfo->stageCount; idx++) { @@ -207,8 +198,7 @@ radv_rt_fill_stage_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, if (pCreateInfo->pLibraryInfo) { for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) { RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]); - struct radv_ray_tracing_pipeline *library_pipeline = - radv_pipeline_to_ray_tracing(pipeline); + struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline); for (unsigned j = 0; j < library_pipeline->stage_count; ++j) { stages[idx].shader = vk_pipeline_cache_object_ref(library_pipeline->stages[j].shader); stages[idx].stage = library_pipeline->stages[j].stage; @@ -230,8 +220,7 @@ radv_create_merged_rt_create_info(const VkRayTracingPipelineCreateInfoKHR *pCrea if (pCreateInfo->pLibraryInfo) { for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) { RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]); - struct radv_ray_tracing_pipeline *library_pipeline = - radv_pipeline_to_ray_tracing(pipeline); + struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline); total_stages += library_pipeline->stage_count; total_groups += library_pipeline->group_count; @@ -281,15 +270,13 @@ move_rt_instructions(nir_shader *shader) } } - nir_metadata_preserve(nir_shader_get_entrypoint(shader), - nir_metadata_all & (~nir_metadata_instr_index)); + nir_metadata_preserve(nir_shader_get_entrypoint(shader), nir_metadata_all & (~nir_metadata_instr_index)); } static struct radv_shader * radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, - const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, - const struct radv_pipeline_key *pipeline_key, struct radv_pipeline_stage *stage, - uint32_t *stack_size) + const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const struct radv_pipeline_key *pipeline_key, + struct radv_pipeline_stage *stage, uint32_t *stack_size) { struct radv_shader_binary *binary; RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout); @@ -336,8 +323,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, for (uint32_t i = 0; i < num_shaders; i++) { struct radv_pipeline_stage temp_stage = *stage; temp_stage.nir = shaders[i]; - radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, - i > 0); + radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0); radv_optimize_nir(temp_stage.nir, pipeline_key->optimisations_disabled); radv_postprocess_nir(device, pipeline_layout, pipeline_key, MESA_SHADER_NONE, &temp_stage); @@ -347,8 +333,8 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, /* Compile NIR shader to AMD assembly. */ struct radv_shader *shader; - shader = radv_shader_nir_to_asm(device, cache, stage, shaders, num_shaders, pipeline_key, - keep_executable_info, keep_statistic_info, &binary); + shader = radv_shader_nir_to_asm(device, cache, stage, shaders, num_shaders, pipeline_key, keep_executable_info, + keep_statistic_info, &binary); if (shader && keep_executable_info && stage->spirv.size) { shader->spirv = malloc(stage->spirv.size); @@ -365,8 +351,7 @@ static VkResult radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *cache, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const VkPipelineCreationFeedbackCreateInfo *creation_feedback, - const struct radv_pipeline_key *key, - struct radv_ray_tracing_pipeline *pipeline) + const struct radv_pipeline_key *key, struct radv_ray_tracing_pipeline *pipeline) { if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) return VK_PIPELINE_COMPILE_REQUIRED; @@ -386,16 +371,15 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca if (radv_ray_tracing_stage_is_compiled(&stages[idx])) { uint32_t stack_size = 0; - struct radv_shader *shader = - radv_rt_nir_to_asm(device, cache, pCreateInfo, key, &stage, &stack_size); + struct radv_shader *shader = radv_rt_nir_to_asm(device, cache, pCreateInfo, key, &stage, &stack_size); stages[idx].stack_size = stack_size; stages[idx].shader = shader ? &shader->base : NULL; } else { uint8_t shader_sha1[SHA1_DIGEST_LENGTH]; radv_hash_shaders(shader_sha1, &stage, 1, NULL, key, radv_get_hash_flags(device, false)); stages[idx].stack_size = stage.nir->scratch_size; - stages[idx].shader = radv_pipeline_cache_nir_to_handle( - device, cache, stage.nir, shader_sha1, !key->optimisations_disabled); + stages[idx].shader = + radv_pipeline_cache_nir_to_handle(device, cache, stage.nir, shader_sha1, !key->optimisations_disabled); } ralloc_free(stage.nir); @@ -442,8 +426,7 @@ radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR return false; for (unsigned i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; ++i) { - if (pCreateInfo->pDynamicState->pDynamicStates[i] == - VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR) + if (pCreateInfo->pDynamicState->pDynamicStates[i] == VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR) return true; } @@ -451,8 +434,7 @@ radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR } static void -compute_rt_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, - struct radv_ray_tracing_pipeline *pipeline) +compute_rt_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct radv_ray_tracing_pipeline *pipeline) { if (radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo)) { pipeline->stack_size = -1u; @@ -490,10 +472,8 @@ compute_rt_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, } pipeline->stack_size = raygen_size + - MIN2(pCreateInfo->maxPipelineRayRecursionDepth, 1) * - MAX2(chit_miss_size, intersection_size + any_hit_size) + - MAX2(0, (int)(pCreateInfo->maxPipelineRayRecursionDepth) - 1) * chit_miss_size + - 2 * callable_size; + MIN2(pCreateInfo->maxPipelineRayRecursionDepth, 1) * MAX2(chit_miss_size, intersection_size + any_hit_size) + + MAX2(0, (int)(pCreateInfo->maxPipelineRayRecursionDepth) - 1) * chit_miss_size + 2 * callable_size; } static void @@ -505,25 +485,21 @@ combine_config(struct ac_shader_config *config, struct ac_shader_config *other) config->spilled_sgprs = MAX2(config->spilled_sgprs, other->spilled_sgprs); config->spilled_vgprs = MAX2(config->spilled_vgprs, other->spilled_vgprs); config->lds_size = MAX2(config->lds_size, other->lds_size); - config->scratch_bytes_per_wave = - MAX2(config->scratch_bytes_per_wave, other->scratch_bytes_per_wave); + config->scratch_bytes_per_wave = MAX2(config->scratch_bytes_per_wave, other->scratch_bytes_per_wave); assert(config->float_mode == other->float_mode); } static void -postprocess_rt_config(struct ac_shader_config *config, enum amd_gfx_level gfx_level, - unsigned wave_size) +postprocess_rt_config(struct ac_shader_config *config, enum amd_gfx_level gfx_level, unsigned wave_size) { - config->rsrc1 = (config->rsrc1 & C_00B848_VGPRS) | - S_00B848_VGPRS((config->num_vgprs - 1) / (wave_size == 32 ? 8 : 4)); + config->rsrc1 = + (config->rsrc1 & C_00B848_VGPRS) | S_00B848_VGPRS((config->num_vgprs - 1) / (wave_size == 32 ? 8 : 4)); if (gfx_level < GFX10) - config->rsrc1 = - (config->rsrc1 & C_00B848_SGPRS) | S_00B848_SGPRS((config->num_sgprs - 1) / 8); + config->rsrc1 = (config->rsrc1 & C_00B848_SGPRS) | S_00B848_SGPRS((config->num_sgprs - 1) / 8); config->rsrc2 = (config->rsrc2 & C_00B84C_LDS_SIZE) | S_00B84C_LDS_SIZE(config->lds_size); - config->rsrc3 = (config->rsrc3 & C_00B8A0_SHARED_VGPR_CNT) | - S_00B8A0_SHARED_VGPR_CNT(config->num_shared_vgprs / 8); + config->rsrc3 = (config->rsrc3 & C_00B8A0_SHARED_VGPR_CNT) | S_00B8A0_SHARED_VGPR_CNT(config->num_shared_vgprs / 8); } static void @@ -535,19 +511,16 @@ compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline * struct ac_shader_config *config = &pipeline->base.base.shaders[MESA_SHADER_COMPUTE]->config; for (unsigned i = 0; i < pipeline->stage_count; i++) { if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i])) { - struct radv_shader *shader = - container_of(pipeline->stages[i].shader, struct radv_shader, base); + struct radv_shader *shader = container_of(pipeline->stages[i].shader, struct radv_shader, base); combine_config(config, &shader->config); } } combine_config(config, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config); - postprocess_rt_config(config, device->physical_device->rad_info.gfx_level, - device->physical_device->rt_wave_size); + postprocess_rt_config(config, device->physical_device->rad_info.gfx_level, device->physical_device->rt_wave_size); } static VkResult -radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, - const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, +radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) { RADV_FROM_HANDLE(radv_device, device, _device); @@ -563,15 +536,13 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, int64_t pipeline_start = os_time_get_nano(); - VkRayTracingPipelineCreateInfoKHR local_create_info = - radv_create_merged_rt_create_info(pCreateInfo); + VkRayTracingPipelineCreateInfoKHR local_create_info = radv_create_merged_rt_create_info(pCreateInfo); VK_MULTIALLOC(ma); VK_MULTIALLOC_DECL(&ma, struct radv_ray_tracing_pipeline, pipeline, 1); VK_MULTIALLOC_DECL(&ma, struct radv_ray_tracing_stage, stages, local_create_info.stageCount); VK_MULTIALLOC_DECL(&ma, struct radv_ray_tracing_group, groups, local_create_info.groupCount); - if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) + if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) return VK_ERROR_OUT_OF_HOST_MEMORY; radv_pipeline_init(device, &pipeline->base.base, RADV_PIPELINE_RAY_TRACING); @@ -585,8 +556,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, if (result != VK_SUCCESS) goto fail; - struct radv_pipeline_key key = - radv_generate_pipeline_key(device, &pipeline->base.base, pCreateInfo->flags); + struct radv_pipeline_key key = radv_generate_pipeline_key(device, &pipeline->base.base, pCreateInfo->flags); radv_hash_rt_shaders(pipeline->sha1, pCreateInfo, &key, pipeline->groups, radv_get_hash_flags(device, keep_statistic_info)); @@ -597,8 +567,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, cache_hit = radv_ray_tracing_pipeline_cache_search(device, cache, pipeline, pCreateInfo); if (!cache_hit) { - result = - radv_rt_compile_shaders(device, cache, pCreateInfo, creation_feedback, &key, pipeline); + result = radv_rt_compile_shaders(device, cache, pCreateInfo, creation_feedback, &key, pipeline); if (result != VK_SUCCESS) goto fail; @@ -613,17 +582,14 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, } if (!cache_hit) - radv_ray_tracing_pipeline_cache_insert(device, cache, pipeline, pCreateInfo->stageCount, - pipeline->sha1); + radv_ray_tracing_pipeline_cache_insert(device, cache, pipeline, pCreateInfo->stageCount, pipeline->sha1); /* write shader VAs into group handles */ for (unsigned i = 0; i < pipeline->group_count; i++) { if (pipeline->groups[i].recursive_shader != VK_SHADER_UNUSED_KHR) { struct radv_shader *shader = - container_of(pipeline->stages[pipeline->groups[i].recursive_shader].shader, - struct radv_shader, base); - pipeline->groups[i].handle.recursive_shader_ptr = - shader->va | radv_get_rt_priority(shader->info.stage); + container_of(pipeline->stages[pipeline->groups[i].recursive_shader].shader, struct radv_shader, base); + pipeline->groups[i].handle.recursive_shader_ptr = shader->va | radv_get_rt_priority(shader->info.stage); } } @@ -639,8 +605,7 @@ fail: } void -radv_destroy_ray_tracing_pipeline(struct radv_device *device, - struct radv_ray_tracing_pipeline *pipeline) +radv_destroy_ray_tracing_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline) { for (unsigned i = 0; i < pipeline->stage_count; i++) { if (pipeline->stages[i].shader) @@ -664,8 +629,7 @@ radv_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR defer unsigned i = 0; for (; i < count; i++) { VkResult r; - r = radv_rt_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, - &pPipelines[i]); + r = radv_rt_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i]); if (r != VK_SUCCESS) { result = r; pPipelines[i] = VK_NULL_HANDLE; @@ -689,8 +653,8 @@ radv_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR defer } VKAPI_ATTR VkResult VKAPI_CALL -radv_GetRayTracingShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, uint32_t firstGroup, - uint32_t groupCount, size_t dataSize, void *pData) +radv_GetRayTracingShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, uint32_t firstGroup, uint32_t groupCount, + size_t dataSize, void *pData) { RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); struct radv_ray_tracing_group *groups = radv_pipeline_to_ray_tracing(pipeline)->groups; @@ -701,8 +665,7 @@ radv_GetRayTracingShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, u memset(data, 0, groupCount * RADV_RT_HANDLE_SIZE); for (uint32_t i = 0; i < groupCount; ++i) { - memcpy(data + i * RADV_RT_HANDLE_SIZE, &groups[firstGroup + i].handle, - sizeof(struct radv_pipeline_group_handle)); + memcpy(data + i * RADV_RT_HANDLE_SIZE, &groups[firstGroup + i].handle, sizeof(struct radv_pipeline_group_handle)); } return VK_SUCCESS; @@ -729,10 +692,8 @@ radv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device, VkPipeline _pipeline, } VKAPI_ATTR VkResult VKAPI_CALL -radv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice device, VkPipeline pipeline, - uint32_t firstGroup, uint32_t groupCount, - size_t dataSize, void *pData) +radv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice device, VkPipeline pipeline, uint32_t firstGroup, + uint32_t groupCount, size_t dataSize, void *pData) { - return radv_GetRayTracingShaderGroupHandlesKHR(device, pipeline, firstGroup, groupCount, - dataSize, pData); + return radv_GetRayTracingShaderGroupHandlesKHR(device, pipeline, firstGroup, groupCount, dataSize, pData); } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index b802603..7dfcbbc 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -60,15 +60,15 @@ #include "vk_debug_report.h" #include "vk_device.h" #include "vk_format.h" +#include "vk_image.h" #include "vk_instance.h" #include "vk_log.h" #include "vk_physical_device.h" -#include "vk_shader_module.h" #include "vk_queue.h" +#include "vk_shader_module.h" #include "vk_util.h" -#include "vk_image.h" -#include "vk_ycbcr_conversion.h" #include "vk_video.h" +#include "vk_ycbcr_conversion.h" #include "rmv/vk_rmv_common.h" #include "rmv/vk_rmv_tokens.h" @@ -106,8 +106,7 @@ typedef uint32_t xcb_window_t; #include "wsi_common.h" #ifdef __cplusplus -extern "C" -{ +extern "C" { #endif /* Helper to determine if we should compile @@ -123,8 +122,8 @@ extern "C" #define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 0 #endif -#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) || \ - defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_DISPLAY_KHR) +#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) || defined(VK_USE_PLATFORM_XLIB_KHR) || \ + defined(VK_USE_PLATFORM_DISPLAY_KHR) #define RADV_USE_WSI_PLATFORM #endif @@ -253,14 +252,14 @@ struct rvcn_decode_buffer_s; /* A non-fatal assert. Useful for debugging. */ #ifdef NDEBUG -#define radv_assert(x) \ - do { \ +#define radv_assert(x) \ + do { \ } while (0) #else -#define radv_assert(x) \ - do { \ - if (unlikely(!(x))) \ - fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \ +#define radv_assert(x) \ + do { \ + if (unlikely(!(x))) \ + fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \ } while (0) #endif @@ -430,28 +429,25 @@ bool radv_pipeline_cache_search(struct radv_device *device, struct vk_pipeline_c bool *found_in_application_cache); void radv_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache, - struct radv_pipeline *pipeline, - struct radv_shader_part_binary *ps_epilog_binary, + struct radv_pipeline *pipeline, struct radv_shader_part_binary *ps_epilog_binary, const unsigned char *sha1); struct radv_ray_tracing_pipeline; -bool radv_ray_tracing_pipeline_cache_search(struct radv_device *device, - struct vk_pipeline_cache *cache, +bool radv_ray_tracing_pipeline_cache_search(struct radv_device *device, struct vk_pipeline_cache *cache, struct radv_ray_tracing_pipeline *pipeline, const VkRayTracingPipelineCreateInfoKHR *create_info); -void radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, - struct vk_pipeline_cache *cache, - struct radv_ray_tracing_pipeline *pipeline, - unsigned num_stages, const unsigned char *sha1); - -struct vk_pipeline_cache_object *radv_pipeline_cache_search_nir(struct radv_device *device, - struct vk_pipeline_cache *cache, - const unsigned char *sha1); +void radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache, + struct radv_ray_tracing_pipeline *pipeline, unsigned num_stages, + const unsigned char *sha1); struct vk_pipeline_cache_object * -radv_pipeline_cache_nir_to_handle(struct radv_device *device, struct vk_pipeline_cache *cache, - struct nir_shader *nir, const unsigned char *sha1, bool cached); +radv_pipeline_cache_search_nir(struct radv_device *device, struct vk_pipeline_cache *cache, const unsigned char *sha1); + +struct vk_pipeline_cache_object *radv_pipeline_cache_nir_to_handle(struct radv_device *device, + struct vk_pipeline_cache *cache, + struct nir_shader *nir, const unsigned char *sha1, + bool cached); struct nir_shader *radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline_cache_object *object); @@ -465,15 +461,13 @@ enum radv_blit_ds_layout { static inline enum radv_blit_ds_layout radv_meta_blit_ds_to_type(VkImageLayout layout) { - return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE - : RADV_BLIT_DS_LAYOUT_TILE_ENABLE; + return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE : RADV_BLIT_DS_LAYOUT_TILE_ENABLE; } static inline VkImageLayout radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout) { - return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL - : VK_IMAGE_LAYOUT_GENERAL; + return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; } enum radv_meta_dst_layout { @@ -485,15 +479,13 @@ enum radv_meta_dst_layout { static inline enum radv_meta_dst_layout radv_meta_dst_layout_from_layout(VkImageLayout layout) { - return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL - : RADV_META_DST_LAYOUT_OPTIMAL; + return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL : RADV_META_DST_LAYOUT_OPTIMAL; } static inline VkImageLayout radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout) { - return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL - : VK_IMAGE_LAYOUT_GENERAL; + return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; } struct radv_meta_state { @@ -768,8 +760,7 @@ struct radv_meta_state { static inline enum radv_queue_family vk_queue_to_radv(const struct radv_physical_device *phys_dev, int queue_family_index) { - if (queue_family_index == VK_QUEUE_FAMILY_EXTERNAL || - queue_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT) + if (queue_family_index == VK_QUEUE_FAMILY_EXTERNAL || queue_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT) return RADV_QUEUE_FOREIGN; if (queue_family_index == VK_QUEUE_FAMILY_IGNORED) return RADV_QUEUE_IGNORED; @@ -779,7 +770,7 @@ vk_queue_to_radv(const struct radv_physical_device *phys_dev, int queue_family_i } enum amd_ip_type radv_queue_family_to_ring(const struct radv_physical_device *physical_device, - enum radv_queue_family f); + enum radv_queue_family f); static inline bool radv_has_uvd(struct radv_physical_device *phys_dev) @@ -858,8 +849,7 @@ int radv_queue_init(struct radv_device *device, struct radv_queue *queue, int id void radv_queue_finish(struct radv_queue *queue); -enum radeon_ctx_priority -radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoKHR *pObj); +enum radeon_ctx_priority radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoKHR *pObj); struct radv_shader_dma_submission { struct list_head list; @@ -1141,8 +1131,7 @@ struct radv_device_memory { #endif }; -void radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device, - struct radeon_winsys_bo *bo); +void radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device, struct radeon_winsys_bo *bo); void radv_device_memory_finish(struct radv_device_memory *mem); struct radv_descriptor_range { @@ -1241,8 +1230,8 @@ struct radv_buffer { VkDeviceSize offset; }; -void radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device, - struct radeon_winsys_bo *bo, uint64_t size, uint64_t offset); +void radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device, struct radeon_winsys_bo *bo, + uint64_t size, uint64_t offset); void radv_buffer_finish(struct radv_buffer *buffer); enum radv_dynamic_state_bits { @@ -1396,13 +1385,11 @@ enum radv_cmd_flush_bits { RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15, RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16, - RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = - (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | - RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META), + RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | + RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META), - RADV_CMD_FLUSH_ALL_COMPUTE = - (RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | - RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_CS_PARTIAL_FLUSH), + RADV_CMD_FLUSH_ALL_COMPUTE = (RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | + RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_CS_PARTIAL_FLUSH), }; enum radv_nggc_settings { @@ -1834,9 +1821,9 @@ struct radv_cmd_buffer { * The follower writes the value, and the leader waits. */ struct { - uint64_t va; /* Virtual address of the semaphore. */ - uint32_t leader_value; /* Current value of the leader. */ - uint32_t emitted_leader_value; /* Emitted value emitted by the leader. */ + uint64_t va; /* Virtual address of the semaphore. */ + uint32_t leader_value; /* Current value of the leader. */ + uint32_t emitted_leader_value; /* Emitted value emitted by the leader. */ } sem; } gang; @@ -1899,8 +1886,7 @@ struct radv_dispatch_info { uint64_t va; }; -void radv_compute_dispatch(struct radv_cmd_buffer *cmd_buffer, - const struct radv_dispatch_info *info); +void radv_compute_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info); struct radv_image; struct radv_image_view; @@ -1914,30 +1900,23 @@ void si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs); void cik_create_gfx_config(struct radv_device *device); -void si_write_scissors(struct radeon_cmdbuf *cs, int count, const VkRect2D *scissors, - const VkViewport *viewports); +void si_write_scissors(struct radeon_cmdbuf *cs, int count, const VkRect2D *scissors, const VkViewport *viewports); -void si_write_guardband(struct radeon_cmdbuf *cs, int count, const VkViewport *viewports, - unsigned rast_prim, unsigned polygon_mode, float line_width); +void si_write_guardband(struct radeon_cmdbuf *cs, int count, const VkViewport *viewports, unsigned rast_prim, + unsigned polygon_mode, float line_width); -VkResult radv_create_shadow_regs_preamble(const struct radv_device *device, - struct radv_queue_state *queue_state); -void radv_destroy_shadow_regs_preamble(struct radv_queue_state *queue_state, - struct radeon_winsys *ws); +VkResult radv_create_shadow_regs_preamble(const struct radv_device *device, struct radv_queue_state *queue_state); +void radv_destroy_shadow_regs_preamble(struct radv_queue_state *queue_state, struct radeon_winsys *ws); void radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_device *device, struct radv_queue_state *queue_state); -VkResult radv_init_shadowed_regs_buffer_state(const struct radv_device *device, - struct radv_queue *queue); - -uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, - bool indirect_draw, bool count_from_stream_output, - uint32_t draw_vertex_count, unsigned topology, - bool prim_restart_enable, unsigned patch_control_points, - unsigned num_tess_patches); -void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec, - unsigned event, unsigned event_flags, unsigned dst_sel, - unsigned data_sel, uint64_t va, uint32_t new_fence, - uint64_t gfx9_eop_bug_va); +VkResult radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct radv_queue *queue); + +uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, bool indirect_draw, + bool count_from_stream_output, uint32_t draw_vertex_count, unsigned topology, + bool prim_restart_enable, unsigned patch_control_points, unsigned num_tess_patches); +void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec, unsigned event, + unsigned event_flags, unsigned dst_sel, unsigned data_sel, uint64_t va, + uint32_t new_fence, uint64_t gfx9_eop_bug_va); struct radv_vgt_shader_key { uint8_t tess : 1; @@ -1952,22 +1931,18 @@ struct radv_vgt_shader_key { uint8_t vs_wave32 : 1; }; -void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, - uint32_t mask); -void si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, - enum amd_gfx_level gfx_level, uint32_t *flush_cnt, uint64_t flush_va, - bool is_mec, enum radv_cmd_flush_bits flush_bits, +void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask); +void si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, + uint32_t *flush_cnt, uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va); void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer); -void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, - unsigned pred_op, uint64_t va); -void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, - uint64_t size); -void si_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, - unsigned size, bool predicating); +void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, unsigned pred_op, + uint64_t va); +void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, uint64_t size); +void si_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size, + bool predicating); void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size); -void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, - unsigned value); +void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, unsigned value); void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer); uint32_t radv_get_pa_su_sc_mode_cntl(const struct radv_cmd_buffer *cmd_buffer); @@ -1980,8 +1955,7 @@ bool radv_cmp_vs_prolog(const void *a_, const void *b_); uint32_t radv_hash_ps_epilog(const void *key_); bool radv_cmp_ps_epilog(const void *a_, const void *b_); -struct radv_ps_epilog_state -{ +struct radv_ps_epilog_state { uint8_t color_attachment_count; VkFormat color_attachment_formats[MAX_RTS]; @@ -1996,82 +1970,62 @@ struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_device * const struct radv_ps_epilog_state *state, bool disable_mrt_compaction); -bool radv_needs_null_export_workaround(const struct radv_device *device, - const struct radv_shader *ps, unsigned custom_blend_mode); +bool radv_needs_null_export_workaround(const struct radv_device *device, const struct radv_shader *ps, + unsigned custom_blend_mode); void radv_cmd_buffer_reset_rendering(struct radv_cmd_buffer *cmd_buffer); -bool radv_cmd_buffer_upload_alloc_aligned(struct radv_cmd_buffer *cmd_buffer, unsigned size, - unsigned alignment, +bool radv_cmd_buffer_upload_alloc_aligned(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned alignment, unsigned *out_offset, void **ptr); -bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, - unsigned *out_offset, void **ptr); -bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, - const void *data, unsigned *out_offset); +bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr); +bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data, + unsigned *out_offset); void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, - const struct radv_graphics_pipeline *pipeline, - bool full_null_descriptors, void *vb_ptr); + const struct radv_graphics_pipeline *pipeline, bool full_null_descriptors, + void *vb_ptr); void radv_write_scissors(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs); -void radv_cmd_buffer_clear_attachment(struct radv_cmd_buffer *cmd_buffer, - const VkClearAttachment *attachment); -void radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer *cmd_buffer, - const VkRenderingInfo *render_info); +void radv_cmd_buffer_clear_attachment(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *attachment); +void radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer *cmd_buffer, const VkRenderingInfo *render_info); void radv_cmd_buffer_resolve_rendering(struct radv_cmd_buffer *cmd_buffer); -void radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, - struct radv_image_view *src_iview, - VkImageLayout src_layout, - struct radv_image_view *dst_iview, - VkImageLayout dst_layout, - const VkImageResolve2 *region); -void radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, - VkImageAspectFlags aspects, +void radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, + VkImageLayout src_layout, struct radv_image_view *dst_iview, + VkImageLayout dst_layout, const VkImageResolve2 *region); +void radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode); -void radv_cmd_buffer_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, - struct radv_image_view *src_iview, - VkImageLayout src_layout, - struct radv_image_view *dst_iview, - VkImageLayout dst_layout); -void radv_depth_stencil_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, - VkImageAspectFlags aspects, +void radv_cmd_buffer_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, + VkImageLayout src_layout, struct radv_image_view *dst_iview, + VkImageLayout dst_layout); +void radv_depth_stencil_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode); void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples); unsigned radv_get_default_max_sample_dist(int log_samples); void radv_device_init_msaa(struct radv_device *device); VkResult radv_device_init_vrs_state(struct radv_device *device); -void radv_emit_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, - uint32_t imm); +void radv_emit_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm); -void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, - const struct radv_image_view *iview, - VkClearDepthStencilValue ds_clear_value, - VkImageAspectFlags aspects); +void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, + VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects); -void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, - const struct radv_image_view *iview, int cb_idx, - uint32_t color_values[2]); +void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, + int cb_idx, uint32_t color_values[2]); -bool radv_image_use_dcc_image_stores(const struct radv_device *device, - const struct radv_image *image); -bool radv_image_use_dcc_predication(const struct radv_device *device, - const struct radv_image *image); +bool radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image); +bool radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image); void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range, bool value); void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range, bool value); -enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, - VkAccessFlags2 src_flags, +enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 src_flags, const struct radv_image *image); -enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, - VkAccessFlags2 dst_flags, +enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_flags, const struct radv_image *image); uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, struct radeon_winsys_bo *bo, uint64_t va, uint64_t size, uint32_t value); void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo, - struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset, - uint64_t size); + struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset, uint64_t size); void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer); bool radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD); @@ -2100,8 +2054,8 @@ radv_emit_shader_pointer_body(struct radv_device *device, struct radeon_cmdbuf * } static inline void -radv_emit_shader_pointer(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset, - uint64_t va, bool global) +radv_emit_shader_pointer(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset, uint64_t va, + bool global) { bool use_32bit_pointers = !global; @@ -2122,14 +2076,12 @@ radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoi } static inline const struct radv_push_constant_state * -radv_get_push_constants_state(const struct radv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint bind_point) +radv_get_push_constants_state(const struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point) { return &cmd_buffer->push_constant_state[vk_to_bind_point(bind_point)]; } -void -radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3]); +void radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3]); /* * Takes x,y,z as exact numbers of invocations, instead of blocks. @@ -2137,11 +2089,9 @@ radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float transl * Limitations: Can't call normal dispatch functions without binding or rebinding * the compute pipeline. */ -void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, - uint32_t z); +void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z); -void radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, - uint64_t va); +void radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t va); struct radv_event { struct vk_object_base base; @@ -2149,13 +2099,13 @@ struct radv_event { uint64_t *map; }; -#define RADV_HASH_SHADER_CS_WAVE32 (1 << 1) -#define RADV_HASH_SHADER_PS_WAVE32 (1 << 2) -#define RADV_HASH_SHADER_GE_WAVE32 (1 << 3) -#define RADV_HASH_SHADER_LLVM (1 << 4) -#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8) -#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13) -#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14) +#define RADV_HASH_SHADER_CS_WAVE32 (1 << 1) +#define RADV_HASH_SHADER_PS_WAVE32 (1 << 2) +#define RADV_HASH_SHADER_GE_WAVE32 (1 << 3) +#define RADV_HASH_SHADER_LLVM (1 << 4) +#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8) +#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13) +#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14) #define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15) #define RADV_HASH_SHADER_EMULATE_RT (1 << 16) #define RADV_HASH_SHADER_SPLIT_FMA (1 << 17) @@ -2167,19 +2117,17 @@ struct radv_event { struct radv_pipeline_key; struct radv_ray_tracing_group; -void radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo, - struct radv_pipeline_stage *out_stage, gl_shader_stage stage); +void radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo, struct radv_pipeline_stage *out_stage, + gl_shader_stage stage); -void radv_hash_shaders(unsigned char *hash, const struct radv_pipeline_stage *stages, - uint32_t stage_count, const struct radv_pipeline_layout *layout, - const struct radv_pipeline_key *key, uint32_t flags); +void radv_hash_shaders(unsigned char *hash, const struct radv_pipeline_stage *stages, uint32_t stage_count, + const struct radv_pipeline_layout *layout, const struct radv_pipeline_key *key, uint32_t flags); -void radv_hash_rt_stages(struct mesa_sha1 *ctx, const VkPipelineShaderStageCreateInfo *stages, - unsigned stage_count); +void radv_hash_rt_stages(struct mesa_sha1 *ctx, const VkPipelineShaderStageCreateInfo *stages, unsigned stage_count); void radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, - const struct radv_pipeline_key *key, - const struct radv_ray_tracing_group *groups, uint32_t flags); + const struct radv_pipeline_key *key, const struct radv_ray_tracing_group *groups, + uint32_t flags); uint32_t radv_get_hash_flags(const struct radv_device *device, bool stats); @@ -2188,16 +2136,16 @@ bool radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipeline bool radv_emulate_rt(const struct radv_physical_device *pdevice); enum { - RADV_RT_STAGE_BITS = (VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR | - VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR | - VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR) + RADV_RT_STAGE_BITS = + (VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | + VK_SHADER_STAGE_MISS_BIT_KHR | VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR) }; #define RADV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1) -#define radv_foreach_stage(stage, stage_bits) \ - for (gl_shader_stage stage, __tmp = (gl_shader_stage)((stage_bits)&RADV_STAGE_MASK); \ - stage = ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage))) +#define radv_foreach_stage(stage, stage_bits) \ + for (gl_shader_stage stage, __tmp = (gl_shader_stage)((stage_bits)&RADV_STAGE_MASK); stage = ffs(__tmp) - 1, __tmp; \ + __tmp &= ~(1 << (stage))) extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS]; unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format); @@ -2232,9 +2180,9 @@ struct radv_pipeline_group_handle { }; enum radv_depth_clamp_mode { - RADV_DEPTH_CLAMP_MODE_VIEWPORT = 0, /* Clamp to the viewport min/max depth bounds */ - RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE = 1, /* Clamp between 0.0f and 1.0f */ - RADV_DEPTH_CLAMP_MODE_DISABLED = 2, /* Disable depth clamping */ + RADV_DEPTH_CLAMP_MODE_VIEWPORT = 0, /* Clamp to the viewport min/max depth bounds */ + RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE = 1, /* Clamp between 0.0f and 1.0f */ + RADV_DEPTH_CLAMP_MODE_DISABLED = 2, /* Disable depth clamping */ }; struct radv_pipeline { @@ -2398,12 +2346,11 @@ struct radv_graphics_lib_pipeline { VkPipelineShaderStageCreateInfo *stages; }; -#define RADV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \ - static inline struct radv_##pipe_type##_pipeline * \ - radv_pipeline_to_##pipe_type(struct radv_pipeline *pipeline) \ - { \ - assert(pipeline->type == pipe_enum); \ - return (struct radv_##pipe_type##_pipeline *) pipeline; \ +#define RADV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \ + static inline struct radv_##pipe_type##_pipeline *radv_pipeline_to_##pipe_type(struct radv_pipeline *pipeline) \ + { \ + assert(pipeline->type == pipe_enum); \ + return (struct radv_##pipe_type##_pipeline *)pipeline; \ } RADV_DECL_PIPELINE_DOWNCAST(graphics, RADV_PIPELINE_GRAPHICS) @@ -2451,15 +2398,13 @@ struct radv_shader *radv_get_shader(struct radv_shader *const *shaders, gl_shade void radv_pipeline_emit_hw_cs(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, const struct radv_shader *shader); -void radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice, - struct radeon_cmdbuf *cs, const struct radv_shader *shader); +void radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, + const struct radv_shader *shader); -bool radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, - unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high, - void *data); +bool radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components, + nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data); -void radv_compute_pipeline_init(const struct radv_device *device, - struct radv_compute_pipeline *pipeline, +void radv_compute_pipeline_init(const struct radv_device *device, struct radv_compute_pipeline *pipeline, const struct radv_pipeline_layout *layout); struct radv_graphics_pipeline_create_info { @@ -2473,11 +2418,9 @@ struct radv_graphics_pipeline_create_info { }; struct radv_pipeline_key radv_generate_pipeline_key(const struct radv_device *device, - const struct radv_pipeline *pipeline, - VkPipelineCreateFlags flags); + const struct radv_pipeline *pipeline, VkPipelineCreateFlags flags); -void radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline, - enum radv_pipeline_type type); +void radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline, enum radv_pipeline_type type); VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, @@ -2486,16 +2429,14 @@ VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache, VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkPipeline *pPipeline); + const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline); bool radv_pipeline_capture_shaders(const struct radv_device *device, VkPipelineCreateFlags flags); -bool radv_pipeline_capture_shader_stats(const struct radv_device *device, - VkPipelineCreateFlags flags); +bool radv_pipeline_capture_shader_stats(const struct radv_device *device, VkPipelineCreateFlags flags); -VkPipelineShaderStageCreateInfo * -radv_copy_shader_stage_create_info(struct radv_device *device, uint32_t stageCount, - const VkPipelineShaderStageCreateInfo *pStages, void *mem_ctx); +VkPipelineShaderStageCreateInfo *radv_copy_shader_stage_create_info(struct radv_device *device, uint32_t stageCount, + const VkPipelineShaderStageCreateInfo *pStages, + void *mem_ctx); bool radv_shader_need_indirect_descriptor_sets(const struct radv_shader *shader); @@ -2507,32 +2448,25 @@ void radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pip const VkAllocationCallbacks *allocator); struct vk_format_description; -uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc, - int first_non_void); -uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc, - int first_non_void); +uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc, int first_non_void); +uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc, int first_non_void); bool radv_is_buffer_format_supported(VkFormat format, bool *scaled); uint32_t radv_colorformat_endian_swap(uint32_t colorformat); unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap); uint32_t radv_translate_dbformat(VkFormat format); -uint32_t radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc, - int first_non_void); -uint32_t radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc, - int first_non_void); -bool radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2], - VkClearColorValue *value); -bool radv_is_storage_image_format_supported(const struct radv_physical_device *physical_device, - VkFormat format); -bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice, - VkFormat format, bool *blendable); +uint32_t radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc, int first_non_void); +uint32_t radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc, int first_non_void); +bool radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2], VkClearColorValue *value); +bool radv_is_storage_image_format_supported(const struct radv_physical_device *physical_device, VkFormat format); +bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice, VkFormat format, bool *blendable); bool radv_dcc_formats_compatible(enum amd_gfx_level gfx_level, VkFormat format1, VkFormat format2, bool *sign_reinterpret); bool radv_is_atomic_format_supported(VkFormat format); bool radv_device_supports_etc(const struct radv_physical_device *physical_device); static const VkImageUsageFlags RADV_IMAGE_USAGE_WRITE_BITS = - VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT; + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | + VK_IMAGE_USAGE_STORAGE_BIT; struct radv_image_plane { VkFormat format; @@ -2581,8 +2515,7 @@ struct radv_image { struct radv_image_plane planes[0]; }; -struct ac_surf_info radv_get_ac_surf_info(struct radv_device *device, - const struct radv_image *image); +struct ac_surf_info radv_get_ac_surf_info(struct radv_device *device, const struct radv_image *image); /* Whether the image has a htile that is known consistent with the contents of * the image and is allowed to be in compressed form. @@ -2590,15 +2523,14 @@ struct ac_surf_info radv_get_ac_surf_info(struct radv_device *device, * If this is false reads that don't use the htile should be able to return * correct results. */ -bool radv_layout_is_htile_compressed(const struct radv_device *device, - const struct radv_image *image, VkImageLayout layout, - unsigned queue_mask); +bool radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image, + VkImageLayout layout, unsigned queue_mask); -bool radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image, - unsigned level, VkImageLayout layout, unsigned queue_mask); +bool radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image, unsigned level, + VkImageLayout layout, unsigned queue_mask); -bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, - unsigned level, VkImageLayout layout, unsigned queue_mask); +bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, unsigned level, + VkImageLayout layout, unsigned queue_mask); enum radv_fmask_compression { RADV_FMASK_COMPRESSION_NONE, @@ -2606,9 +2538,9 @@ enum radv_fmask_compression { RADV_FMASK_COMPRESSION_FULL, }; -enum radv_fmask_compression radv_layout_fmask_compression( - const struct radv_device *device, const struct radv_image *image, VkImageLayout layout, - unsigned queue_mask); +enum radv_fmask_compression radv_layout_fmask_compression(const struct radv_device *device, + const struct radv_image *image, VkImageLayout layout, + unsigned queue_mask); /** * Return whether the image has CMASK metadata for color surfaces. @@ -2634,8 +2566,7 @@ radv_image_has_fmask(const struct radv_image *image) static inline bool radv_image_has_dcc(const struct radv_image *image) { - return !(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) && - image->planes[0].surface.meta_offset; + return !(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) && image->planes[0].surface.meta_offset; } /** @@ -2671,8 +2602,7 @@ radv_image_has_CB_metadata(const struct radv_image *image) static inline bool radv_image_has_htile(const struct radv_image *image) { - return image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER && - image->planes[0].surface.meta_size; + return image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER && image->planes[0].surface.meta_size; } /** @@ -2703,8 +2633,7 @@ radv_htile_enabled(const struct radv_image *image, unsigned level) static inline bool radv_image_is_tc_compat_htile(const struct radv_image *image) { - return radv_image_has_htile(image) && - (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE); + return radv_image_has_htile(image) && (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE); } /** @@ -2824,23 +2753,20 @@ radv_image_get_iterate256(const struct radv_device *device, struct radv_image *i { /* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */ return device->physical_device->rad_info.gfx_level >= GFX10 && - (image->vk.usage & - (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) && + (image->vk.usage & (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) && radv_image_is_tc_compat_htile(image) && image->vk.samples > 1; } -unsigned radv_image_queue_family_mask(const struct radv_image *image, - enum radv_queue_family family, +unsigned radv_image_queue_family_mask(const struct radv_image *image, enum radv_queue_family family, enum radv_queue_family queue_family); bool radv_image_is_renderable(const struct radv_device *device, const struct radv_image *image); struct radeon_bo_metadata; -void radv_init_metadata(struct radv_device *device, struct radv_image *image, - struct radeon_bo_metadata *metadata); +void radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata); -void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, - uint64_t offset, uint32_t stride); +void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, uint64_t offset, + uint32_t stride); union radv_descriptor { struct { @@ -2883,29 +2809,25 @@ struct radv_image_create_info { const struct radeon_bo_metadata *bo_metadata; }; -VkResult -radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info, - const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info, - const struct VkVideoProfileListInfoKHR *profile_list, - struct radv_image *image); +VkResult radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info, + const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info, + const struct VkVideoProfileListInfoKHR *profile_list, struct radv_image *image); VkResult radv_image_create(VkDevice _device, const struct radv_image_create_info *info, const VkAllocationCallbacks *alloc, VkImage *pImage, bool is_internal); -bool radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext, - VkFormat format, VkImageCreateFlags flags, - bool *sign_reinterpret); +bool radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext, VkFormat format, + VkImageCreateFlags flags, bool *sign_reinterpret); bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format); VkResult radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info, - const VkNativeBufferANDROID *gralloc_info, - const VkAllocationCallbacks *alloc, VkImage *out_image_h); -VkResult radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, - unsigned priority, + const VkNativeBufferANDROID *gralloc_info, const VkAllocationCallbacks *alloc, + VkImage *out_image_h); +VkResult radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, unsigned priority, const VkImportAndroidHardwareBufferInfoANDROID *info); -VkResult radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, - unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo); +VkResult radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, unsigned priority, + const VkMemoryAllocateInfo *pAllocateInfo); unsigned radv_ahb_format_for_vk_format(VkFormat vk_format); @@ -2921,8 +2843,7 @@ struct radv_image_view_extra_create_info { }; void radv_image_view_init(struct radv_image_view *view, struct radv_device *device, - const VkImageViewCreateInfo *pCreateInfo, - VkImageCreateFlags img_create_flags, + const VkImageViewCreateInfo *pCreateInfo, VkImageCreateFlags img_create_flags, const struct radv_image_view_extra_create_info *extra_create_info); void radv_image_view_finish(struct radv_image_view *iview); @@ -2961,8 +2882,7 @@ struct radv_resolve_barrier { VkAccessFlags2 dst_access_mask; }; -void radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer, - const struct radv_resolve_barrier *barrier); +void radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_resolve_barrier *barrier); VkResult radv_device_init_meta(struct radv_device *device); void radv_device_finish_meta(struct radv_device *device); @@ -2994,22 +2914,19 @@ struct radv_pc_query_pool { }; void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool); -VkResult radv_pc_init_query_pool(struct radv_physical_device *pdevice, - const VkQueryPoolCreateInfo *pCreateInfo, +VkResult radv_pc_init_query_pool(struct radv_physical_device *pdevice, const VkQueryPoolCreateInfo *pCreateInfo, struct radv_pc_query_pool *pool); -void radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, - uint64_t va); -void radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, - uint64_t va); +void radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va); +void radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va); void radv_pc_get_results(const struct radv_pc_query_pool *pc_pool, const uint64_t *data, void *out); -#define VL_MACROBLOCK_WIDTH 16 +#define VL_MACROBLOCK_WIDTH 16 #define VL_MACROBLOCK_HEIGHT 16 struct radv_vid_mem { struct radv_device_memory *mem; - VkDeviceSize offset; - VkDeviceSize size; + VkDeviceSize offset; + VkDeviceSize size; }; struct radv_video_session { @@ -3018,11 +2935,7 @@ struct radv_video_session { uint32_t stream_handle; unsigned stream_type; bool interlaced; - enum { - DPB_MAX_RES = 0, - DPB_DYNAMIC_TIER_1, - DPB_DYNAMIC_TIER_2 - } dpb_type; + enum { DPB_MAX_RES = 0, DPB_DYNAMIC_TIER_1, DPB_DYNAMIC_TIER_2 } dpb_type; unsigned db_alignment; struct radv_vid_mem sessionctx; @@ -3036,15 +2949,15 @@ struct radv_video_session_params { }; /* needed for ac_gpu_info codecs */ -#define RADV_VIDEO_FORMAT_UNKNOWN 0 -#define RADV_VIDEO_FORMAT_MPEG12 1 /**< MPEG1, MPEG2 */ -#define RADV_VIDEO_FORMAT_MPEG4 2 /**< DIVX, XVID */ -#define RADV_VIDEO_FORMAT_VC1 3 /**< WMV */ -#define RADV_VIDEO_FORMAT_MPEG4_AVC 4/**< H.264 */ -#define RADV_VIDEO_FORMAT_HEVC 5 /**< H.265 */ -#define RADV_VIDEO_FORMAT_JPEG 6 /**< JPEG */ -#define RADV_VIDEO_FORMAT_VP9 7 /**< VP9 */ -#define RADV_VIDEO_FORMAT_AV1 8 /**< AV1 */ +#define RADV_VIDEO_FORMAT_UNKNOWN 0 +#define RADV_VIDEO_FORMAT_MPEG12 1 /**< MPEG1, MPEG2 */ +#define RADV_VIDEO_FORMAT_MPEG4 2 /**< DIVX, XVID */ +#define RADV_VIDEO_FORMAT_VC1 3 /**< WMV */ +#define RADV_VIDEO_FORMAT_MPEG4_AVC 4 /**< H.264 */ +#define RADV_VIDEO_FORMAT_HEVC 5 /**< H.265 */ +#define RADV_VIDEO_FORMAT_JPEG 6 /**< JPEG */ +#define RADV_VIDEO_FORMAT_VP9 7 /**< VP9 */ +#define RADV_VIDEO_FORMAT_AV1 8 /**< AV1 */ bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs); @@ -3057,24 +2970,20 @@ void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindP void radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, VkDescriptorSet overrideSet, uint32_t descriptorWriteCount, - const VkWriteDescriptorSet *pDescriptorWrites, - uint32_t descriptorCopyCount, + const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount, const VkCopyDescriptorSet *pDescriptorCopies); -void radv_cmd_update_descriptor_set_with_template(struct radv_device *device, - struct radv_cmd_buffer *cmd_buffer, +void radv_cmd_update_descriptor_set_with_template(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_set *set, VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData); -void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout, - uint32_t set, uint32_t descriptorWriteCount, +void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, uint32_t set, uint32_t descriptorWriteCount, const VkWriteDescriptorSet *pDescriptorWrites); -void radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, - VkFormat vk_format, unsigned offset, unsigned range, - uint32_t *state); +void radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFormat vk_format, unsigned offset, + unsigned range, uint32_t *state); uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range, uint32_t value); @@ -3087,26 +2996,21 @@ struct radv_shader_args; struct radv_nir_compiler_options; struct radv_shader_info; -void llvm_compile_shader(const struct radv_nir_compiler_options *options, - const struct radv_shader_info *info, unsigned shader_count, - struct nir_shader *const *shaders, struct radv_shader_binary **binary, +void llvm_compile_shader(const struct radv_nir_compiler_options *options, const struct radv_shader_info *info, + unsigned shader_count, struct nir_shader *const *shaders, struct radv_shader_binary **binary, const struct radv_shader_args *args); /* radv_shader_info.h */ struct radv_shader_info; -void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir, - gl_shader_stage next_stage, - const struct radv_pipeline_layout *layout, - const struct radv_pipeline_key *pipeline_key, - const enum radv_pipeline_type pipeline_type, - bool consider_force_vrs, +void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir, gl_shader_stage next_stage, + const struct radv_pipeline_layout *layout, const struct radv_pipeline_key *pipeline_key, + const enum radv_pipeline_type pipeline_type, bool consider_force_vrs, struct radv_shader_info *info); void radv_nir_shader_info_init(struct radv_shader_info *info); -void radv_nir_shader_info_link(struct radv_device *device, - const struct radv_pipeline_key *pipeline_key, +void radv_nir_shader_info_link(struct radv_device *device, const struct radv_pipeline_key *pipeline_key, struct radv_pipeline_stage *stages); bool radv_sqtt_init(struct radv_device *device); @@ -3115,13 +3019,11 @@ bool radv_begin_sqtt(struct radv_queue *queue); bool radv_end_sqtt(struct radv_queue *queue); bool radv_get_sqtt_trace(struct radv_queue *queue, struct ac_sqtt_trace *sqtt_trace); void radv_reset_sqtt_trace(struct radv_device *device); -void radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, - uint32_t num_dwords); +void radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords); bool radv_is_instruction_timing_enabled(void); bool radv_sqtt_sample_clocks(struct radv_device *device); -void radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, - bool inhibit); +void radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit); void radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable); int radv_rra_trace_frame(void); @@ -3133,69 +3035,52 @@ void radv_rra_trace_init(struct radv_device *device); VkResult radv_rra_dump_trace(VkQueue vk_queue, char *filename); void radv_rra_trace_finish(VkDevice vk_device, struct radv_rra_trace_data *data); -bool radv_sdma_copy_image(struct radv_device *device, struct radeon_cmdbuf *cs, - struct radv_image *image, struct radv_buffer *buffer, - const VkBufferImageCopy2 *region); -void radv_sdma_copy_buffer(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va, - uint64_t dst_va, uint64_t size); +bool radv_sdma_copy_image(struct radv_device *device, struct radeon_cmdbuf *cs, struct radv_image *image, + struct radv_buffer *buffer, const VkBufferImageCopy2 *region); +void radv_sdma_copy_buffer(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va, + uint64_t size); void radv_memory_trace_init(struct radv_device *device); -void radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, - uint32_t size, bool is_internal); +void radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t size, bool is_internal); void radv_rmv_log_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo); void radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool is_internal, VkMemoryAllocateFlags alloc_flags); void radv_rmv_log_buffer_bind(struct radv_device *device, VkBuffer _buffer); -void radv_rmv_log_image_create(struct radv_device *device, const VkImageCreateInfo *create_info, - bool is_internal, VkImage _image); +void radv_rmv_log_image_create(struct radv_device *device, const VkImageCreateInfo *create_info, bool is_internal, + VkImage _image); void radv_rmv_log_image_bind(struct radv_device *device, VkImage _image); void radv_rmv_log_query_pool_create(struct radv_device *device, VkQueryPool pool, bool is_internal); void radv_rmv_log_command_buffer_bo_create(struct radv_device *device, struct radeon_winsys_bo *bo, - uint32_t executable_size, uint32_t data_size, - uint32_t scratch_size); -void radv_rmv_log_command_buffer_bo_destroy(struct radv_device *device, - struct radeon_winsys_bo *bo); -void radv_rmv_log_border_color_palette_create(struct radv_device *device, - struct radeon_winsys_bo *bo); -void radv_rmv_log_border_color_palette_destroy(struct radv_device *device, - struct radeon_winsys_bo *bo); -void radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, - uint64_t offset); -void radv_rmv_log_sparse_remove_residency(struct radv_device *device, - struct radeon_winsys_bo *src_bo, uint64_t offset); -void radv_rmv_log_descriptor_pool_create(struct radv_device *device, - const VkDescriptorPoolCreateInfo *create_info, + uint32_t executable_size, uint32_t data_size, uint32_t scratch_size); +void radv_rmv_log_command_buffer_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo); +void radv_rmv_log_border_color_palette_create(struct radv_device *device, struct radeon_winsys_bo *bo); +void radv_rmv_log_border_color_palette_destroy(struct radv_device *device, struct radeon_winsys_bo *bo); +void radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset); +void radv_rmv_log_sparse_remove_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset); +void radv_rmv_log_descriptor_pool_create(struct radv_device *device, const VkDescriptorPoolCreateInfo *create_info, VkDescriptorPool pool, bool is_internal); void radv_rmv_log_graphics_pipeline_create(struct radv_device *device, VkPipelineCreateFlags flags, struct radv_pipeline *pipeline, bool is_internal); void radv_rmv_log_compute_pipeline_create(struct radv_device *device, VkPipelineCreateFlags flags, struct radv_pipeline *pipeline, bool is_internal); -void radv_rmv_log_event_create(struct radv_device *device, VkEvent event, VkEventCreateFlags flags, - bool is_internal); +void radv_rmv_log_event_create(struct radv_device *device, VkEvent event, VkEventCreateFlags flags, bool is_internal); void radv_rmv_log_resource_destroy(struct radv_device *device, uint64_t handle); void radv_rmv_log_submit(struct radv_device *device, enum amd_ip_type type); -void radv_rmv_fill_device_info(const struct radv_physical_device *device, - struct vk_rmv_device_info *info); +void radv_rmv_fill_device_info(const struct radv_physical_device *device, struct vk_rmv_device_info *info); void radv_rmv_collect_trace_events(struct radv_device *device); void radv_memory_trace_finish(struct radv_device *device); VkResult radv_create_buffer(struct radv_device *device, const VkBufferCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer, - bool is_internal); + const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer, bool is_internal); VkResult radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAllocateInfo, - const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem, - bool is_internal); -VkResult radv_create_query_pool(struct radv_device *device, - const VkQueryPoolCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool, - bool is_internal); -VkResult radv_create_descriptor_pool(struct radv_device *device, - const VkDescriptorPoolCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDescriptorPool *pDescriptorPool, bool is_internal); + const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem, bool is_internal); +VkResult radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool, bool is_internal); +VkResult radv_create_descriptor_pool(struct radv_device *device, const VkDescriptorPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkDescriptorPool *pDescriptorPool, + bool is_internal); VkResult radv_create_event(struct radv_device *device, const VkEventCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, VkEvent *pEvent, - bool is_internal); + const VkAllocationCallbacks *pAllocator, VkEvent *pEvent, bool is_internal); /* radv_sqtt_layer_.c */ struct radv_barrier_data { @@ -3243,20 +3128,16 @@ void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer); void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer); void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer); void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z); -void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, - VkImageAspectFlagBits aspects); +void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlagBits aspects); void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer); void radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer); void radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer); -void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, - enum rgp_barrier_reason reason); +void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason); void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer); void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer); -void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, - const struct radv_barrier_data *barrier); +void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier); -void radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, - struct radv_graphics_pipeline *pipeline); +void radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline); struct radv_indirect_command_layout { struct vk_object_base base; @@ -3286,8 +3167,7 @@ struct radv_indirect_command_layout { uint32_t radv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info); -void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, - const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo); +void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo); static inline uint32_t si_conv_prim_to_gs_out(uint32_t topology, bool is_ngg) @@ -3539,29 +3419,25 @@ si_translate_blend_factor(enum amd_gfx_level gfx_level, VkBlendFactor factor) case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return V_028780_BLEND_ONE_MINUS_DST_ALPHA; case VK_BLEND_FACTOR_CONSTANT_COLOR: - return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_COLOR_GFX11 - : V_028780_BLEND_CONSTANT_COLOR_GFX6; + return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_COLOR_GFX11 : V_028780_BLEND_CONSTANT_COLOR_GFX6; case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX11 - : V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX6; + : V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX6; case VK_BLEND_FACTOR_CONSTANT_ALPHA: - return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_ALPHA_GFX11 - : V_028780_BLEND_CONSTANT_ALPHA_GFX6; + return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_ALPHA_GFX11 : V_028780_BLEND_CONSTANT_ALPHA_GFX6; case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX11 - : V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX6; + : V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX6; case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: return V_028780_BLEND_SRC_ALPHA_SATURATE; case VK_BLEND_FACTOR_SRC1_COLOR: return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_COLOR_GFX11 : V_028780_BLEND_SRC1_COLOR_GFX6; case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: - return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_COLOR_GFX11 - : V_028780_BLEND_INV_SRC1_COLOR_GFX6; + return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_COLOR_GFX11 : V_028780_BLEND_INV_SRC1_COLOR_GFX6; case VK_BLEND_FACTOR_SRC1_ALPHA: return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_ALPHA_GFX11 : V_028780_BLEND_SRC1_ALPHA_GFX6; case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: - return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_ALPHA_GFX11 - : V_028780_BLEND_INV_SRC1_ALPHA_GFX6; + return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_ALPHA_GFX11 : V_028780_BLEND_INV_SRC1_ALPHA_GFX6; default: return 0; } @@ -3576,18 +3452,15 @@ si_translate_blend_opt_factor(VkBlendFactor factor, bool is_alpha) case VK_BLEND_FACTOR_ONE: return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; case VK_BLEND_FACTOR_SRC_COLOR: - return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 - : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; + return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: - return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 - : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; + return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; case VK_BLEND_FACTOR_SRC_ALPHA: return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: - return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE - : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; + return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; default: return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; } @@ -3616,8 +3489,7 @@ static inline bool si_blend_factor_uses_dst(VkBlendFactor factor) { return factor == VK_BLEND_FACTOR_DST_COLOR || factor == VK_BLEND_FACTOR_DST_ALPHA || - factor == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || - factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA || + factor == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA || factor == VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; } @@ -3658,8 +3530,7 @@ radv_is_streamout_enabled(struct radv_cmd_buffer *cmd_buffer) struct radv_streamout_state *so = &cmd_buffer->state.streamout; /* Streamout must be enabled for the PRIMITIVES_GENERATED query to work. */ - return (so->streamout_enabled || cmd_buffer->state.active_prims_gen_queries) && - !cmd_buffer->state.suspend_streamout; + return (so->streamout_enabled || cmd_buffer->state.active_prims_gen_queries) && !cmd_buffer->state.suspend_streamout; } /* @@ -3688,74 +3559,55 @@ radv_use_llvm_for_stage(const struct radv_device *device, UNUSED gl_shader_stage static inline bool radv_has_shader_buffer_float_minmax(const struct radv_physical_device *pdevice, unsigned bitsize) { - return (pdevice->rad_info.gfx_level <= GFX7 && !pdevice->use_llvm) || - pdevice->rad_info.gfx_level == GFX10 || pdevice->rad_info.gfx_level == GFX10_3 || - (pdevice->rad_info.gfx_level == GFX11 && bitsize == 32); + return (pdevice->rad_info.gfx_level <= GFX7 && !pdevice->use_llvm) || pdevice->rad_info.gfx_level == GFX10 || + pdevice->rad_info.gfx_level == GFX10_3 || (pdevice->rad_info.gfx_level == GFX11 && bitsize == 32); } /* radv_perfcounter.c */ void radv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders); void radv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs); -void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, - int family); -void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, - int family); +void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family); +void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family); /* radv_spm.c */ bool radv_spm_init(struct radv_device *device); void radv_spm_finish(struct radv_device *device); void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs); -void radv_destroy_graphics_pipeline(struct radv_device *device, - struct radv_graphics_pipeline *pipeline); -void radv_destroy_graphics_lib_pipeline(struct radv_device *device, - struct radv_graphics_lib_pipeline *pipeline); -void radv_destroy_compute_pipeline(struct radv_device *device, - struct radv_compute_pipeline *pipeline); -void radv_destroy_ray_tracing_pipeline(struct radv_device *device, - struct radv_ray_tracing_pipeline *pipeline); +void radv_destroy_graphics_pipeline(struct radv_device *device, struct radv_graphics_pipeline *pipeline); +void radv_destroy_graphics_lib_pipeline(struct radv_device *device, struct radv_graphics_lib_pipeline *pipeline); +void radv_destroy_compute_pipeline(struct radv_device *device, struct radv_compute_pipeline *pipeline); +void radv_destroy_ray_tracing_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline); -#define RADV_FROM_HANDLE(__radv_type, __name, __handle) \ - VK_FROM_HANDLE(__radv_type, __name, __handle) +#define RADV_FROM_HANDLE(__radv_type, __name, __handle) VK_FROM_HANDLE(__radv_type, __name, __handle) -VK_DEFINE_HANDLE_CASTS(radv_cmd_buffer, vk.base, VkCommandBuffer, - VK_OBJECT_TYPE_COMMAND_BUFFER) +VK_DEFINE_HANDLE_CASTS(radv_cmd_buffer, vk.base, VkCommandBuffer, VK_OBJECT_TYPE_COMMAND_BUFFER) VK_DEFINE_HANDLE_CASTS(radv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) VK_DEFINE_HANDLE_CASTS(radv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE) -VK_DEFINE_HANDLE_CASTS(radv_physical_device, vk.base, VkPhysicalDevice, - VK_OBJECT_TYPE_PHYSICAL_DEVICE) +VK_DEFINE_HANDLE_CASTS(radv_physical_device, vk.base, VkPhysicalDevice, VK_OBJECT_TYPE_PHYSICAL_DEVICE) VK_DEFINE_HANDLE_CASTS(radv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE) VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer, vk.base, VkBuffer, VK_OBJECT_TYPE_BUFFER) -VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, base, VkBufferView, - VK_OBJECT_TYPE_BUFFER_VIEW) -VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool, - VK_OBJECT_TYPE_DESCRIPTOR_POOL) -VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, header.base, VkDescriptorSet, - VK_OBJECT_TYPE_DESCRIPTOR_SET) +VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, base, VkBufferView, VK_OBJECT_TYPE_BUFFER_VIEW) +VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool, VK_OBJECT_TYPE_DESCRIPTOR_POOL) +VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, header.base, VkDescriptorSet, VK_OBJECT_TYPE_DESCRIPTOR_SET) VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, vk.base, VkDescriptorSetLayout, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT) -VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, base, - VkDescriptorUpdateTemplate, +VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, base, VkDescriptorUpdateTemplate, VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE) -VK_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, base, VkDeviceMemory, - VK_OBJECT_TYPE_DEVICE_MEMORY) +VK_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, base, VkDeviceMemory, VK_OBJECT_TYPE_DEVICE_MEMORY) VK_DEFINE_NONDISP_HANDLE_CASTS(radv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT) VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE) -VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, vk.base, VkImageView, - VK_OBJECT_TYPE_IMAGE_VIEW); +VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, vk.base, VkImageView, VK_OBJECT_TYPE_IMAGE_VIEW); VK_DEFINE_NONDISP_HANDLE_CASTS(radv_indirect_command_layout, base, VkIndirectCommandsLayoutNV, VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV) -VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline, - VK_OBJECT_TYPE_PIPELINE) -VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, base, VkPipelineLayout, - VK_OBJECT_TYPE_PIPELINE_LAYOUT) -VK_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, base, VkQueryPool, - VK_OBJECT_TYPE_QUERY_POOL) -VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, base, VkSampler, - VK_OBJECT_TYPE_SAMPLER) +VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline, VK_OBJECT_TYPE_PIPELINE) +VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, base, VkPipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT) +VK_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, base, VkQueryPool, VK_OBJECT_TYPE_QUERY_POOL) +VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, base, VkSampler, VK_OBJECT_TYPE_SAMPLER) VK_DEFINE_NONDISP_HANDLE_CASTS(radv_video_session, vk.base, VkVideoSessionKHR, VK_OBJECT_TYPE_VIDEO_SESSION_KHR) -VK_DEFINE_NONDISP_HANDLE_CASTS(radv_video_session_params, vk.base, VkVideoSessionParametersKHR, VK_OBJECT_TYPE_VIDEO_SESSION_PARAMETERS_KHR) +VK_DEFINE_NONDISP_HANDLE_CASTS(radv_video_session_params, vk.base, VkVideoSessionParametersKHR, + VK_OBJECT_TYPE_VIDEO_SESSION_PARAMETERS_KHR) #ifdef __cplusplus } diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index b9e74c0..e1fb44e 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -29,11 +29,11 @@ #include #include "bvh/bvh.h" +#include "meta/radv_meta.h" #include "nir/nir_builder.h" #include "util/u_atomic.h" #include "vulkan/vulkan_core.h" #include "radv_cs.h" -#include "meta/radv_meta.h" #include "radv_private.h" #include "sid.h" #include "vk_acceleration_structure.h" @@ -51,8 +51,8 @@ radv_get_pipelinestat_query_size(struct radv_device *device) } static void -radv_store_availability(nir_builder *b, nir_ssa_def *flags, nir_ssa_def *dst_buf, - nir_ssa_def *offset, nir_ssa_def *value32) +radv_store_availability(nir_builder *b, nir_ssa_def *flags, nir_ssa_def *dst_buf, nir_ssa_def *offset, + nir_ssa_def *value32) { nir_push_if(b, nir_test_mask(b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)); @@ -115,8 +115,7 @@ build_occlusion_query_shader(struct radv_device *device) b.shader->info.workgroup_size[0] = 64; nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result"); - nir_variable *outer_counter = - nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter"); + nir_variable *outer_counter = nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter"); nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start"); nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end"); nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available"); @@ -151,8 +150,7 @@ build_occlusion_query_shader(struct radv_device *device) nir_scoped_memory_barrier(&b, NIR_SCOPE_INVOCATION, NIR_MEMORY_ACQUIRE, nir_var_mem_ssbo); nir_ssa_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset); - nir_ssa_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, - .access = ACCESS_COHERENT); + nir_ssa_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, .access = ACCESS_COHERENT); nir_push_if(&b, nir_ige_imm(&b, load, 0x80000000)); { @@ -170,8 +168,7 @@ build_occlusion_query_shader(struct radv_device *device) radv_break_on_count(&b, outer_counter, nir_imm_int(&b, db_count)); nir_ssa_def *enabled_cond = - nir_iand_imm(&b, nir_ishl(&b, nir_imm_int64(&b, 1), current_outer_count), - enabled_rb_mask); + nir_iand_imm(&b, nir_ishl(&b, nir_imm_int64(&b, 1), current_outer_count), enabled_rb_mask); nir_push_if(&b, nir_i2b(&b, enabled_cond)); @@ -189,8 +186,7 @@ build_occlusion_query_shader(struct radv_device *device) nir_push_if(&b, nir_iand(&b, start_done, end_done)); nir_store_var(&b, result, - nir_iadd(&b, nir_load_var(&b, result), - nir_isub(&b, nir_load_var(&b, end), nir_load_var(&b, start))), + nir_iadd(&b, nir_load_var(&b, result), nir_isub(&b, nir_load_var(&b, end), nir_load_var(&b, start))), 0x1); nir_push_else(&b, NULL); @@ -204,10 +200,8 @@ build_occlusion_query_shader(struct radv_device *device) /* Store the result if complete or if partial results have been requested. */ nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT); - nir_ssa_def *result_size = - nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4)); - nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), - nir_load_var(&b, available))); + nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4)); + nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available))); nir_push_if(&b, result_is_64bit); @@ -215,8 +209,7 @@ build_occlusion_query_shader(struct radv_device *device) nir_push_else(&b, NULL); - nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf, output_base, - .align_mul = 8); + nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf, output_base, .align_mul = 8); nir_pop_if(&b, NULL); nir_pop_if(&b, NULL); @@ -277,10 +270,8 @@ build_pipeline_statistics_query_shader(struct radv_device *device) nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "pipeline_statistics_query"); b.shader->info.workgroup_size[0] = 64; - nir_variable *output_offset = - nir_local_variable_create(b.impl, glsl_int_type(), "output_offset"); - nir_variable *result = - nir_local_variable_create(b.impl, glsl_int64_t_type(), "result"); + nir_variable *output_offset = nir_local_variable_create(b.impl, glsl_int_type(), "output_offset"); + nir_variable *result = nir_local_variable_create(b.impl, glsl_int64_t_type(), "result"); nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4); nir_ssa_def *stats_mask = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12); @@ -315,8 +306,7 @@ build_pipeline_statistics_query_shader(struct radv_device *device) nir_ssa_def *elem_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4)); nir_ssa_def *elem_count = nir_ushr_imm(&b, stats_mask, 16); - radv_store_availability(&b, flags, dst_buf, - nir_iadd(&b, output_base, nir_imul(&b, elem_count, elem_size)), + radv_store_availability(&b, flags, dst_buf, nir_iadd(&b, output_base, nir_imul(&b, elem_count, elem_size)), available32); nir_push_if(&b, nir_i2b(&b, available32)); @@ -339,12 +329,10 @@ build_pipeline_statistics_query_shader(struct radv_device *device) VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT))); { /* Compute the GDS result if needed. */ - nir_ssa_def *gds_start_offset = - nir_iadd(&b, input_base, nir_imm_int(&b, pipelinestat_block_size * 2)); + nir_ssa_def *gds_start_offset = nir_iadd(&b, input_base, nir_imm_int(&b, pipelinestat_block_size * 2)); nir_ssa_def *gds_start = nir_load_ssbo(&b, 1, 64, src_buf, gds_start_offset); - nir_ssa_def *gds_end_offset = - nir_iadd(&b, input_base, nir_imm_int(&b, pipelinestat_block_size * 2 + 8)); + nir_ssa_def *gds_end_offset = nir_iadd(&b, input_base, nir_imm_int(&b, pipelinestat_block_size * 2 + 8)); nir_ssa_def *gds_end = nir_load_ssbo(&b, 1, 64, src_buf, gds_end_offset); nir_ssa_def *ngg_gds_result = nir_isub(&b, gds_end, gds_start); @@ -364,8 +352,7 @@ build_pipeline_statistics_query_shader(struct radv_device *device) nir_pop_if(&b, NULL); - nir_store_var(&b, output_offset, nir_iadd(&b, nir_load_var(&b, output_offset), elem_size), - 0x1); + nir_store_var(&b, output_offset, nir_iadd(&b, nir_load_var(&b, output_offset), elem_size), 0x1); nir_pop_if(&b, NULL); } @@ -443,8 +430,7 @@ build_tfb_query_shader(struct radv_device *device) b.shader->info.workgroup_size[0] = 64; /* Create and initialize local variables. */ - nir_variable *result = - nir_local_variable_create(b.impl, glsl_vector_type(GLSL_TYPE_UINT64, 2), "result"); + nir_variable *result = nir_local_variable_create(b.impl, glsl_vector_type(GLSL_TYPE_UINT64, 2), "result"); nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available"); nir_store_var(&b, result, nir_replicate(&b, nir_imm_int64(&b, 0), 2), 0x3); @@ -467,29 +453,23 @@ build_tfb_query_shader(struct radv_device *device) /* Load data from the query pool. */ nir_ssa_def *load1 = nir_load_ssbo(&b, 4, 32, src_buf, input_base, .align_mul = 32); - nir_ssa_def *load2 = - nir_load_ssbo(&b, 4, 32, src_buf, nir_iadd_imm(&b, input_base, 16), .align_mul = 16); + nir_ssa_def *load2 = nir_load_ssbo(&b, 4, 32, src_buf, nir_iadd_imm(&b, input_base, 16), .align_mul = 16); /* Check if result is available. */ nir_ssa_def *avails[2]; avails[0] = nir_iand(&b, nir_channel(&b, load1, 1), nir_channel(&b, load1, 3)); avails[1] = nir_iand(&b, nir_channel(&b, load2, 1), nir_channel(&b, load2, 3)); - nir_ssa_def *result_is_available = - nir_test_mask(&b, nir_iand(&b, avails[0], avails[1]), 0x80000000); + nir_ssa_def *result_is_available = nir_test_mask(&b, nir_iand(&b, avails[0], avails[1]), 0x80000000); /* Only compute result if available. */ nir_push_if(&b, result_is_available); /* Pack values. */ nir_ssa_def *packed64[4]; - packed64[0] = - nir_pack_64_2x32(&b, nir_trim_vector(&b, load1, 2)); - packed64[1] = - nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load1, 2), nir_channel(&b, load1, 3))); - packed64[2] = - nir_pack_64_2x32(&b, nir_trim_vector(&b, load2, 2)); - packed64[3] = - nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load2, 2), nir_channel(&b, load2, 3))); + packed64[0] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load1, 2)); + packed64[1] = nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load1, 2), nir_channel(&b, load1, 3))); + packed64[2] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load2, 2)); + packed64[3] = nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load2, 2), nir_channel(&b, load2, 3))); /* Compute result. */ nir_ssa_def *num_primitive_written = nir_isub(&b, packed64[3], packed64[1]); @@ -502,12 +482,10 @@ build_tfb_query_shader(struct radv_device *device) /* Determine if result is 64 or 32 bit. */ nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT); - nir_ssa_def *result_size = - nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), nir_imm_int(&b, 8)); + nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), nir_imm_int(&b, 8)); /* Store the result if complete or partial results have been requested. */ - nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), - nir_load_var(&b, available))); + nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available))); /* Store result. */ nir_push_if(&b, result_is_64bit); @@ -590,8 +568,7 @@ build_timestamp_query_shader(struct radv_device *device) /* Pack the timestamp. */ nir_ssa_def *timestamp; - timestamp = - nir_pack_64_2x32(&b, nir_trim_vector(&b, load, 2)); + timestamp = nir_pack_64_2x32(&b, nir_trim_vector(&b, load, 2)); /* Check if result is available. */ nir_ssa_def *result_is_available = nir_i2b(&b, nir_ine_imm(&b, timestamp, TIMESTAMP_NOT_READY)); @@ -606,12 +583,10 @@ build_timestamp_query_shader(struct radv_device *device) /* Determine if result is 64 or 32 bit. */ nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT); - nir_ssa_def *result_size = - nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4)); + nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4)); /* Store the result if complete or partial results have been requested. */ - nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), - nir_load_var(&b, available))); + nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available))); /* Store result. */ nir_push_if(&b, result_is_64bit); @@ -632,7 +607,7 @@ build_timestamp_query_shader(struct radv_device *device) return b.shader; } -#define RADV_PGQ_STRIDE 32 +#define RADV_PGQ_STRIDE 32 #define RADV_PGQ_STRIDE_GDS (RADV_PGQ_STRIDE + 4 * 2) static nir_shader * @@ -679,8 +654,7 @@ build_pg_query_shader(struct radv_device *device) b.shader->info.workgroup_size[0] = 64; /* Create and initialize local variables. */ - nir_variable *result = - nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result"); + nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result"); nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available"); nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1); @@ -696,19 +670,19 @@ build_pg_query_shader(struct radv_device *device) nir_ssa_def *global_id = get_global_ids(&b, 1); /* Determine if the query pool uses GDS for NGG. */ - nir_ssa_def *uses_gds = - nir_i2b(&b, nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20)); + nir_ssa_def *uses_gds = nir_i2b(&b, nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20)); /* Compute src/dst strides. */ - nir_ssa_def *input_stride = nir_bcsel(&b, uses_gds, nir_imm_int(&b, RADV_PGQ_STRIDE_GDS), nir_imm_int(&b, RADV_PGQ_STRIDE)); + nir_ssa_def *input_stride = + nir_bcsel(&b, uses_gds, nir_imm_int(&b, RADV_PGQ_STRIDE_GDS), nir_imm_int(&b, RADV_PGQ_STRIDE)); nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id); nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 16); nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id); /* Load data from the query pool. */ nir_ssa_def *load1 = nir_load_ssbo(&b, 2, 32, src_buf, input_base, .align_mul = 32); - nir_ssa_def *load2 = nir_load_ssbo( - &b, 2, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 16)), .align_mul = 16); + nir_ssa_def *load2 = + nir_load_ssbo(&b, 2, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 16)), .align_mul = 16); /* Check if result is available. */ nir_ssa_def *avails[2]; @@ -722,10 +696,8 @@ build_pg_query_shader(struct radv_device *device) /* Pack values. */ nir_ssa_def *packed64[2]; - packed64[0] = - nir_pack_64_2x32(&b, nir_trim_vector(&b, load1, 2)); - packed64[1] = - nir_pack_64_2x32(&b, nir_trim_vector(&b, load2, 2)); + packed64[0] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load1, 2)); + packed64[1] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load2, 2)); /* Compute result. */ nir_ssa_def *primitive_storage_needed = nir_isub(&b, packed64[1], packed64[0]); @@ -751,12 +723,10 @@ build_pg_query_shader(struct radv_device *device) /* Determine if result is 64 or 32 bit. */ nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT); - nir_ssa_def *result_size = - nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), nir_imm_int(&b, 8)); + nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), nir_imm_int(&b, 8)); /* Store the result if complete or partial results have been requested. */ - nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), - nir_load_var(&b, available))); + nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available))); /* Store result. */ nir_push_if(&b, result_is_64bit); @@ -815,8 +785,7 @@ radv_device_init_meta_query_state_internal(struct radv_device *device) }}; result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &occlusion_ds_create_info, - &device->meta_state.alloc, - &device->meta_state.query.ds_layout); + &device->meta_state.alloc, &device->meta_state.query.ds_layout); if (result != VK_SUCCESS) goto fail; @@ -828,9 +797,8 @@ radv_device_init_meta_query_state_internal(struct radv_device *device) .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20}, }; - result = - radv_CreatePipelineLayout(radv_device_to_handle(device), &occlusion_pl_create_info, - &device->meta_state.alloc, &device->meta_state.query.p_layout); + result = radv_CreatePipelineLayout(radv_device_to_handle(device), &occlusion_pl_create_info, + &device->meta_state.alloc, &device->meta_state.query.p_layout); if (result != VK_SUCCESS) goto fail; @@ -849,9 +817,9 @@ radv_device_init_meta_query_state_internal(struct radv_device *device) .layout = device->meta_state.query.p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &occlusion_vk_pipeline_info, NULL, - &device->meta_state.query.occlusion_query_pipeline); + result = + radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &occlusion_vk_pipeline_info, + NULL, &device->meta_state.query.occlusion_query_pipeline); if (result != VK_SUCCESS) goto fail; @@ -870,10 +838,9 @@ radv_device_init_meta_query_state_internal(struct radv_device *device) .layout = device->meta_state.query.p_layout, }; - result = radv_compute_pipeline_create( - radv_device_to_handle(device), device->meta_state.cache, - &pipeline_statistics_vk_pipeline_info, NULL, - &device->meta_state.query.pipeline_statistics_query_pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, + &pipeline_statistics_vk_pipeline_info, NULL, + &device->meta_state.query.pipeline_statistics_query_pipeline); if (result != VK_SUCCESS) goto fail; @@ -892,9 +859,8 @@ radv_device_init_meta_query_state_internal(struct radv_device *device) .layout = device->meta_state.query.p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &tfb_pipeline_info, NULL, - &device->meta_state.query.tfb_query_pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &tfb_pipeline_info, + NULL, &device->meta_state.query.tfb_query_pipeline); if (result != VK_SUCCESS) goto fail; @@ -913,9 +879,9 @@ radv_device_init_meta_query_state_internal(struct radv_device *device) .layout = device->meta_state.query.p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - ×tamp_pipeline_info, NULL, - &device->meta_state.query.timestamp_query_pipeline); + result = + radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, ×tamp_pipeline_info, + NULL, &device->meta_state.query.timestamp_query_pipeline); if (result != VK_SUCCESS) goto fail; @@ -934,9 +900,8 @@ radv_device_init_meta_query_state_internal(struct radv_device *device) .layout = device->meta_state.query.p_layout, }; - result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, - &pg_pipeline_info, NULL, - &device->meta_state.query.pg_query_pipeline); + result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &pg_pipeline_info, + NULL, &device->meta_state.query.pg_query_pipeline); fail: ralloc_free(occlusion_cs); @@ -961,44 +926,39 @@ void radv_device_finish_meta_query_state(struct radv_device *device) { if (device->meta_state.query.tfb_query_pipeline) - radv_DestroyPipeline(radv_device_to_handle(device), - device->meta_state.query.tfb_query_pipeline, &device->meta_state.alloc); + radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.query.tfb_query_pipeline, + &device->meta_state.alloc); if (device->meta_state.query.pipeline_statistics_query_pipeline) - radv_DestroyPipeline(radv_device_to_handle(device), - device->meta_state.query.pipeline_statistics_query_pipeline, + radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.query.pipeline_statistics_query_pipeline, &device->meta_state.alloc); if (device->meta_state.query.occlusion_query_pipeline) - radv_DestroyPipeline(radv_device_to_handle(device), - device->meta_state.query.occlusion_query_pipeline, + radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.query.occlusion_query_pipeline, &device->meta_state.alloc); if (device->meta_state.query.timestamp_query_pipeline) - radv_DestroyPipeline(radv_device_to_handle(device), - device->meta_state.query.timestamp_query_pipeline, + radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.query.timestamp_query_pipeline, &device->meta_state.alloc); if (device->meta_state.query.pg_query_pipeline) - radv_DestroyPipeline(radv_device_to_handle(device), - device->meta_state.query.pg_query_pipeline, &device->meta_state.alloc); + radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.query.pg_query_pipeline, + &device->meta_state.alloc); if (device->meta_state.query.p_layout) radv_DestroyPipelineLayout(radv_device_to_handle(device), device->meta_state.query.p_layout, &device->meta_state.alloc); if (device->meta_state.query.ds_layout) - device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), - device->meta_state.query.ds_layout, - &device->meta_state.alloc); + device->vk.dispatch_table.DestroyDescriptorSetLayout( + radv_device_to_handle(device), device->meta_state.query.ds_layout, &device->meta_state.alloc); } static void -radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkPipeline *pipeline, - struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo, - uint64_t src_offset, uint64_t dst_offset, uint32_t src_stride, - uint32_t dst_stride, size_t dst_size, uint32_t count, uint32_t flags, - uint32_t pipeline_stats_mask, uint32_t avail_offset, bool uses_gds) +radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkPipeline *pipeline, struct radeon_winsys_bo *src_bo, + struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset, uint32_t src_stride, + uint32_t dst_stride, size_t dst_size, uint32_t count, uint32_t flags, uint32_t pipeline_stats_mask, + uint32_t avail_offset, bool uses_gds) { struct radv_device *device = cmd_buffer->device; struct radv_meta_saved_state saved_state; @@ -1016,8 +976,8 @@ radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkPipeline *pipeline, * affected by conditional rendering. */ radv_meta_save(&saved_state, cmd_buffer, - RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | - RADV_META_SAVE_DESCRIPTORS | RADV_META_SUSPEND_PREDICATING); + RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS | + RADV_META_SUSPEND_PREDICATING); uint64_t src_buffer_size = MAX2(src_stride * count, avail_offset + 4 * count - src_offset); uint64_t dst_buffer_size = dst_stride * (count - 1) + dst_size; @@ -1025,29 +985,27 @@ radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkPipeline *pipeline, radv_buffer_init(&src_buffer, device, src_bo, src_buffer_size, src_offset); radv_buffer_init(&dst_buffer, device, dst_bo, dst_buffer_size, dst_offset); - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, - *pipeline); + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); radv_meta_push_descriptor_set( cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.query.p_layout, 0, /* set */ - 2, /* descriptorWriteCount */ - (VkWriteDescriptorSet[]){ - {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer), - .offset = 0, - .range = VK_WHOLE_SIZE}}, - {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&src_buffer), - .offset = 0, - .range = VK_WHOLE_SIZE}}}); + 2, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer), + .offset = 0, + .range = VK_WHOLE_SIZE}}, + {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&src_buffer), + .offset = 0, + .range = VK_WHOLE_SIZE}}}); /* Encode the number of elements for easy access by the shader. */ pipeline_stats_mask &= 0x7ff; @@ -1105,16 +1063,15 @@ radv_destroy_query_pool(struct radv_device *device, const VkAllocationCallbacks VkResult radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool, - bool is_internal) + const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool, bool is_internal) { VkResult result; size_t pool_struct_size = pCreateInfo->queryType == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR ? sizeof(struct radv_pc_query_pool) : sizeof(struct radv_query_pool); - struct radv_query_pool *pool = vk_alloc2(&device->vk.alloc, pAllocator, pool_struct_size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + struct radv_query_pool *pool = + vk_alloc2(&device->vk.alloc, pAllocator, pool_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!pool) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1128,10 +1085,10 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo * * hardware if GS uses the legacy path. When NGG GS is used, the hardware can't know the number * of generated primitives and we have to increment it from the shader using a plain GDS atomic. */ - pool->uses_gds = (device->physical_device->emulate_ngg_gs_query_pipeline_stat && - (pool->pipeline_stats_mask & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)) || - (device->physical_device->use_ngg && - pCreateInfo->queryType == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT); + pool->uses_gds = + (device->physical_device->emulate_ngg_gs_query_pipeline_stat && + (pool->pipeline_stats_mask & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)) || + (device->physical_device->use_ngg && pCreateInfo->queryType == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT); switch (pCreateInfo->queryType) { case VK_QUERY_TYPE_OCCLUSION: @@ -1167,8 +1124,7 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo * } break; case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: { - result = radv_pc_init_query_pool(device->physical_device, pCreateInfo, - (struct radv_pc_query_pool *)pool); + result = radv_pc_init_query_pool(device->physical_device, pCreateInfo, (struct radv_pc_query_pool *)pool); if (result != VK_SUCCESS) { radv_destroy_query_pool(device, pAllocator, pool); @@ -1186,8 +1142,7 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo * pool->size += 4 * pCreateInfo->queryCount; result = device->ws->buffer_create(device->ws, pool->size, 64, RADEON_DOMAIN_GTT, - RADEON_FLAG_NO_INTERPROCESS_SHARING, - RADV_BO_PRIORITY_QUERY_POOL, 0, &pool->bo); + RADEON_FLAG_NO_INTERPROCESS_SHARING, RADV_BO_PRIORITY_QUERY_POOL, 0, &pool->bo); if (result != VK_SUCCESS) { radv_destroy_query_pool(device, pAllocator, pool); return vk_error(device, result); @@ -1225,9 +1180,8 @@ radv_DestroyQueryPool(VkDevice _device, VkQueryPool _pool, const VkAllocationCal } VKAPI_ATTR VkResult VKAPI_CALL -radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery, - uint32_t queryCount, size_t dataSize, void *pData, VkDeviceSize stride, - VkQueryResultFlags flags) +radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount, + size_t dataSize, void *pData, VkDeviceSize stride, VkQueryResultFlags flags) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); @@ -1288,8 +1242,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first do { start = p_atomic_read(src64 + 2 * i); end = p_atomic_read(src64 + 2 * i + 1); - } while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) && - (flags & VK_QUERY_RESULT_WAIT_BIT)); + } while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) && (flags & VK_QUERY_RESULT_WAIT_BIT)); if (!(start & (1ull << 63)) || !(end & (1ull << 63))) available = 0; @@ -1314,8 +1267,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first } case VK_QUERY_TYPE_PIPELINE_STATISTICS: { unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device); - const uint32_t *avail_ptr = - (const uint32_t *)(pool->ptr + pool->availability_offset + 4 * query); + const uint32_t *avail_ptr = (const uint32_t *)(pool->ptr + pool->availability_offset + 4 * query); uint64_t ngg_gds_result = 0; do { @@ -1341,11 +1293,9 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) { if (pool->pipeline_stats_mask & (1u << i)) { if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) { - *dst = stop[pipeline_statistics_indices[i]] - - start[pipeline_statistics_indices[i]]; + *dst = stop[pipeline_statistics_indices[i]] - start[pipeline_statistics_indices[i]]; - if (pool->uses_gds && - (1u << i) == VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) { + if (pool->uses_gds && (1u << i) == VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) { *dst += ngg_gds_result; } } @@ -1359,11 +1309,9 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) { if (pool->pipeline_stats_mask & (1u << i)) { if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) { - *dst = stop[pipeline_statistics_indices[i]] - - start[pipeline_statistics_indices[i]]; + *dst = stop[pipeline_statistics_indices[i]] - start[pipeline_statistics_indices[i]]; - if (pool->uses_gds && - (1u << i) == VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) { + if (pool->uses_gds && (1u << i) == VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) { *dst += ngg_gds_result; } } @@ -1533,9 +1481,9 @@ radv_query_result_size(const struct radv_query_pool *pool, VkQueryResultFlags fl } VKAPI_ATTR void VKAPI_CALL -radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, - uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer, - VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags) +radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, + uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride, + VkQueryResultFlags flags) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); @@ -1570,10 +1518,9 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: - radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline, - pool->bo, dst_buffer->bo, firstQuery * pool->stride, - dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount, - flags, 0, 0, false); + radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline, pool->bo, + dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, + dst_size, queryCount, flags, 0, 0, false); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: if (flags & VK_QUERY_RESULT_WAIT_BIT) { @@ -1588,11 +1535,10 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff); } } - radv_query_shader( - cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, - pool->bo, dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, - pool->stride, stride, dst_size, queryCount, flags, pool->pipeline_stats_mask, - pool->availability_offset + 4 * firstQuery, pool->uses_gds); + radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, pool->bo, + dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, + dst_size, queryCount, flags, pool->pipeline_stats_mask, + pool->availability_offset + 4 * firstQuery, pool->uses_gds); break; case VK_QUERY_TYPE_TIMESTAMP: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR: @@ -1609,15 +1555,13 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo /* Wait on the high 32 bits of the timestamp in * case the low part is 0xffffffff. */ - radv_cp_wait_mem(cs, WAIT_REG_MEM_NOT_EQUAL, local_src_va + 4, - TIMESTAMP_NOT_READY >> 32, 0xffffffff); + radv_cp_wait_mem(cs, WAIT_REG_MEM_NOT_EQUAL, local_src_va + 4, TIMESTAMP_NOT_READY >> 32, 0xffffffff); } } - radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.timestamp_query_pipeline, - pool->bo, dst_buffer->bo, firstQuery * pool->stride, - dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount, - flags, 0, 0, false); + radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.timestamp_query_pipeline, pool->bo, + dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, + dst_size, queryCount, flags, 0, 0, false); break; case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: if (flags & VK_QUERY_RESULT_WAIT_BIT) { @@ -1629,16 +1573,14 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo /* Wait on the upper word of all results. */ for (unsigned j = 0; j < 4; j++, src_va += 8) { - radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, - 0xffffffff); + radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff); } } } - radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.tfb_query_pipeline, - pool->bo, dst_buffer->bo, firstQuery * pool->stride, - dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount, - flags, 0, 0, false); + radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.tfb_query_pipeline, pool->bo, dst_buffer->bo, + firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, + queryCount, flags, 0, 0, false); break; case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: if (flags & VK_QUERY_RESULT_WAIT_BIT) { @@ -1654,10 +1596,10 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo } } - radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pg_query_pipeline, - pool->bo, dst_buffer->bo, firstQuery * pool->stride, - dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount, - flags, 0, 0, pool->uses_gds && cmd_buffer->device->physical_device->rad_info.gfx_level < GFX11); + radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pg_query_pipeline, pool->bo, dst_buffer->bo, + firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, + queryCount, flags, 0, 0, + pool->uses_gds && cmd_buffer->device->physical_device->rad_info.gfx_level < GFX11); break; default: unreachable("trying to get results of unhandled query type"); @@ -1680,8 +1622,7 @@ query_clear_value(VkQueryType type) } VKAPI_ATTR void VKAPI_CALL -radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, - uint32_t queryCount) +radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); @@ -1694,15 +1635,13 @@ radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uin */ cmd_buffer->state.flush_bits |= cmd_buffer->active_query_flush_bits; - flush_bits |= radv_fill_buffer(cmd_buffer, NULL, pool->bo, - radv_buffer_get_va(pool->bo) + firstQuery * pool->stride, + flush_bits |= radv_fill_buffer(cmd_buffer, NULL, pool->bo, radv_buffer_get_va(pool->bo) + firstQuery * pool->stride, queryCount * pool->stride, value); if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) { flush_bits |= radv_fill_buffer(cmd_buffer, NULL, pool->bo, - radv_buffer_get_va(pool->bo) + pool->availability_offset + firstQuery * 4, - queryCount * 4, 0); + radv_buffer_get_va(pool->bo) + pool->availability_offset + firstQuery * 4, queryCount * 4, 0); } if (flush_bits) { @@ -1713,8 +1652,7 @@ radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uin } VKAPI_ATTR void VKAPI_CALL -radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery, - uint32_t queryCount) +radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount) { RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); @@ -1771,8 +1709,7 @@ gfx10_copy_gds_query(struct radv_cmd_buffer *cmd_buffer, uint32_t gds_offset, ui si_emit_cache_flush(cmd_buffer); radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM); radeon_emit(cs, gds_offset); radeon_emit(cs, 0); radeon_emit(cs, va); @@ -1780,8 +1717,8 @@ gfx10_copy_gds_query(struct radv_cmd_buffer *cmd_buffer, uint32_t gds_offset, ui } static void -emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, - VkQueryType query_type, VkQueryControlFlags flags, uint32_t index) +emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, VkQueryType query_type, + VkQueryControlFlags flags, uint32_t index) { struct radeon_cmdbuf *cs = cmd_buffer->cs; switch (query_type) { @@ -1799,8 +1736,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY; } else { - if ((flags & VK_QUERY_CONTROL_PRECISE_BIT) && - !cmd_buffer->state.perfect_occlusion_queries_enabled) { + if ((flags & VK_QUERY_CONTROL_PRECISE_BIT) && !cmd_buffer->state.perfect_occlusion_queries_enabled) { /* This is not the first query, but this one * needs to enable precision, DB_COUNT_CONTROL * has to be updated accordingly. @@ -1928,8 +1864,8 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo } static void -emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, - uint64_t avail_va, VkQueryType query_type, uint32_t index) +emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, uint64_t avail_va, + VkQueryType query_type, uint32_t index) { struct radeon_cmdbuf *cs = cmd_buffer->cs; switch (query_type) { @@ -1979,9 +1915,8 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, radeon_emit(cs, va >> 32); si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, - radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, - 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, 1, - cmd_buffer->gfx9_eop_bug_va); + radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, + EOP_DATA_SEL_VALUE_32BIT, avail_va, 1, cmd_buffer->gfx9_eop_bug_va); if (pool->uses_gds) { va += pipelinestat_block_size + 8; @@ -2059,12 +1994,10 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, unreachable("ending unhandled query type"); } - cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | - RADV_CMD_FLAG_INV_VCACHE; + cmd_buffer->active_query_flush_bits |= + RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE; if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) { - cmd_buffer->active_query_flush_bits |= - RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; + cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; } } @@ -2087,15 +2020,13 @@ radv_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPoo } VKAPI_ATTR void VKAPI_CALL -radv_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, - VkQueryControlFlags flags) +radv_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags) { radv_CmdBeginQueryIndexedEXT(commandBuffer, queryPool, query, flags, 0); } VKAPI_ATTR void VKAPI_CALL -radv_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, - uint32_t index) +radv_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, uint32_t index) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); @@ -2133,8 +2064,8 @@ radv_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t } VKAPI_ATTR void VKAPI_CALL -radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, - VkQueryPool queryPool, uint32_t query) +radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkQueryPool queryPool, + uint32_t query) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); @@ -2149,7 +2080,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta /* Make sure previously launched waves have finished */ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH; } - + si_emit_cache_flush(cmd_buffer); int num_queries = 1; @@ -2161,37 +2092,33 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta for (unsigned i = 0; i < num_queries; i++) { if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) { radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM | - COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) | COPY_DATA_DST_SEL(V_370_MEM)); + radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM | COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) | + COPY_DATA_DST_SEL(V_370_MEM)); radeon_emit(cs, 0); radeon_emit(cs, 0); radeon_emit(cs, query_va); radeon_emit(cs, query_va >> 32); } else { - si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, - mec, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, - EOP_DATA_SEL_TIMESTAMP, query_va, 0, + si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, mec, + V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_TIMESTAMP, query_va, 0, cmd_buffer->gfx9_eop_bug_va); } query_va += pool->stride; } - cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | - RADV_CMD_FLAG_INV_VCACHE; + cmd_buffer->active_query_flush_bits |= + RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE; if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) { - cmd_buffer->active_query_flush_bits |= - RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; + cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; } assert(cmd_buffer->cs->cdw <= cdw_max); } VKAPI_ATTR void VKAPI_CALL -radv_CmdWriteAccelerationStructuresPropertiesKHR( - VkCommandBuffer commandBuffer, uint32_t accelerationStructureCount, - const VkAccelerationStructureKHR *pAccelerationStructures, VkQueryType queryType, - VkQueryPool queryPool, uint32_t firstQuery) +radv_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer, uint32_t accelerationStructureCount, + const VkAccelerationStructureKHR *pAccelerationStructures, + VkQueryType queryType, VkQueryPool queryPool, uint32_t firstQuery) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); @@ -2203,8 +2130,7 @@ radv_CmdWriteAccelerationStructuresPropertiesKHR( si_emit_cache_flush(cmd_buffer); - ASSERTED unsigned cdw_max = - radeon_check_space(cmd_buffer->device->ws, cs, 6 * accelerationStructureCount); + ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 6 * accelerationStructureCount); for (uint32_t i = 0; i < accelerationStructureCount; ++i) { RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pAccelerationStructures[i]); diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c index d9c7b52..615470d 100644 --- a/src/amd/vulkan/radv_queue.c +++ b/src/amd/vulkan/radv_queue.c @@ -28,8 +28,8 @@ #include "radv_cs.h" #include "radv_debug.h" #include "radv_private.h" -#include "vk_sync.h" #include "vk_semaphore.h" +#include "vk_sync.h" enum radeon_ctx_priority radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoKHR *pObj) @@ -89,8 +89,8 @@ radv_sparse_buffer_bind_memory(struct radv_device *device, const VkSparseBufferM } } if (size) { - result = device->ws->buffer_virtual_bind(device->ws, buffer->bo, resourceOffset, size, - mem ? mem->bo : NULL, memoryOffset); + result = device->ws->buffer_virtual_bind(device->ws, buffer->bo, resourceOffset, size, mem ? mem->bo : NULL, + memoryOffset); if (result != VK_SUCCESS) return result; @@ -105,8 +105,8 @@ radv_sparse_buffer_bind_memory(struct radv_device *device, const VkSparseBufferM memoryOffset = bind->pBinds[i].memoryOffset; } if (size) { - result = device->ws->buffer_virtual_bind(device->ws, buffer->bo, resourceOffset, size, - mem ? mem->bo : NULL, memoryOffset); + result = device->ws->buffer_virtual_bind(device->ws, buffer->bo, resourceOffset, size, mem ? mem->bo : NULL, + memoryOffset); if (mem) radv_rmv_log_sparse_add_residency(device, buffer->bo, memoryOffset); @@ -118,8 +118,7 @@ radv_sparse_buffer_bind_memory(struct radv_device *device, const VkSparseBufferM } static VkResult -radv_sparse_image_opaque_bind_memory(struct radv_device *device, - const VkSparseImageOpaqueMemoryBindInfo *bind) +radv_sparse_image_opaque_bind_memory(struct radv_device *device, const VkSparseImageOpaqueMemoryBindInfo *bind) { RADV_FROM_HANDLE(radv_image, image, bind->image); VkResult result; @@ -130,18 +129,16 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device, if (bind->pBinds[i].memory != VK_NULL_HANDLE) mem = radv_device_memory_from_handle(bind->pBinds[i].memory); - result = device->ws->buffer_virtual_bind(device->ws, image->bindings[0].bo, - bind->pBinds[i].resourceOffset, bind->pBinds[i].size, - mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset); + result = + device->ws->buffer_virtual_bind(device->ws, image->bindings[0].bo, bind->pBinds[i].resourceOffset, + bind->pBinds[i].size, mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset); if (result != VK_SUCCESS) return result; if (bind->pBinds[i].memory) - radv_rmv_log_sparse_add_residency(device, image->bindings[0].bo, - bind->pBinds[i].resourceOffset); + radv_rmv_log_sparse_add_residency(device, image->bindings[0].bo, bind->pBinds[i].resourceOffset); else - radv_rmv_log_sparse_remove_residency(device, image->bindings[0].bo, - bind->pBinds[i].resourceOffset); + radv_rmv_log_sparse_remove_residency(device, image->bindings[0].bo, bind->pBinds[i].resourceOffset); } return VK_SUCCESS; @@ -164,10 +161,8 @@ radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMem const uint32_t level = bind->pBinds[i].subresource.mipLevel; VkExtent3D bind_extent = bind->pBinds[i].extent; - bind_extent.width = - DIV_ROUND_UP(bind_extent.width, vk_format_get_blockwidth(image->vk.format)); - bind_extent.height = - DIV_ROUND_UP(bind_extent.height, vk_format_get_blockheight(image->vk.format)); + bind_extent.width = DIV_ROUND_UP(bind_extent.width, vk_format_get_blockwidth(image->vk.format)); + bind_extent.height = DIV_ROUND_UP(bind_extent.height, vk_format_get_blockheight(image->vk.format)); VkOffset3D bind_offset = bind->pBinds[i].offset; bind_offset.x /= vk_format_get_blockwidth(image->vk.format); @@ -186,24 +181,23 @@ radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMem pitch = surface->u.legacy.level[level].nblk_x; } - offset += bind_offset.z * depth_pitch + - ((uint64_t)bind_offset.y * pitch * surface->prt_tile_depth + - (uint64_t)bind_offset.x * surface->prt_tile_height * surface->prt_tile_depth) * - bs; + offset += + bind_offset.z * depth_pitch + ((uint64_t)bind_offset.y * pitch * surface->prt_tile_depth + + (uint64_t)bind_offset.x * surface->prt_tile_height * surface->prt_tile_depth) * + bs; uint32_t aligned_extent_width = ALIGN(bind_extent.width, surface->prt_tile_width); uint32_t aligned_extent_height = ALIGN(bind_extent.height, surface->prt_tile_height); uint32_t aligned_extent_depth = ALIGN(bind_extent.depth, surface->prt_tile_depth); - bool whole_subres = - (bind_extent.height <= surface->prt_tile_height || aligned_extent_width == pitch) && - (bind_extent.depth <= surface->prt_tile_depth || - (uint64_t)aligned_extent_width * aligned_extent_height * bs == depth_pitch); + bool whole_subres = (bind_extent.height <= surface->prt_tile_height || aligned_extent_width == pitch) && + (bind_extent.depth <= surface->prt_tile_depth || + (uint64_t)aligned_extent_width * aligned_extent_height * bs == depth_pitch); if (whole_subres) { uint64_t size = (uint64_t)aligned_extent_width * aligned_extent_height * aligned_extent_depth * bs; - result = device->ws->buffer_virtual_bind(device->ws, image->bindings[0].bo, offset, size, - mem ? mem->bo : NULL, mem_offset); + result = device->ws->buffer_virtual_bind(device->ws, image->bindings[0].bo, offset, size, mem ? mem->bo : NULL, + mem_offset); if (result != VK_SUCCESS) return result; @@ -221,8 +215,8 @@ radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMem z += surface->prt_tile_depth, offset += depth_pitch * surface->prt_tile_depth) { for (unsigned y = 0; y < bind_extent.height; y += surface->prt_tile_height) { result = device->ws->buffer_virtual_bind( - device->ws, image->bindings[0].bo, offset + (uint64_t)img_y_increment * y, size, - mem ? mem->bo : NULL, mem_offset + (uint64_t)mem_y_increment * y + mem_z_increment * z); + device->ws, image->bindings[0].bo, offset + (uint64_t)img_y_increment * y, size, mem ? mem->bo : NULL, + mem_offset + (uint64_t)mem_y_increment * y + mem_z_increment * z); if (result != VK_SUCCESS) return result; @@ -248,8 +242,7 @@ radv_queue_submit_bind_sparse_memory(struct radv_device *device, struct vk_queue } for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) { - VkResult result = - radv_sparse_image_opaque_bind_memory(device, submission->image_opaque_binds + i); + VkResult result = radv_sparse_image_opaque_bind_memory(device, submission->image_opaque_binds + i); if (result != VK_SUCCESS) return result; } @@ -277,13 +270,11 @@ radv_queue_submit_empty(struct radv_queue *queue, struct vk_queue_submit *submis } static void -radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sample_positions, - uint32_t esgs_ring_size, struct radeon_winsys_bo *esgs_ring_bo, - uint32_t gsvs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo, - struct radeon_winsys_bo *tess_rings_bo, - struct radeon_winsys_bo *task_rings_bo, - struct radeon_winsys_bo *mesh_scratch_ring_bo, uint32_t attr_ring_size, - struct radeon_winsys_bo *attr_ring_bo) +radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sample_positions, uint32_t esgs_ring_size, + struct radeon_winsys_bo *esgs_ring_bo, uint32_t gsvs_ring_size, + struct radeon_winsys_bo *gsvs_ring_bo, struct radeon_winsys_bo *tess_rings_bo, + struct radeon_winsys_bo *task_rings_bo, struct radeon_winsys_bo *mesh_scratch_ring_bo, + uint32_t attr_ring_size, struct radeon_winsys_bo *attr_ring_bo) { uint32_t *desc = &map[4]; @@ -305,15 +296,14 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(1); if (device->physical_device->rad_info.gfx_level >= GFX11) { - desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); + desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); } else if (device->physical_device->rad_info.gfx_level >= GFX10) { desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); } else if (device->physical_device->rad_info.gfx_level >= GFX8) { /* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */ - desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | - S_008F0C_ELEMENT_SIZE(1); + desc[3] |= + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1); } else { desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1); @@ -329,14 +319,13 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (device->physical_device->rad_info.gfx_level >= GFX11) { - desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); + desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); } else if (device->physical_device->rad_info.gfx_level >= GFX10) { desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); } else { - desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + desc[7] |= + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); } } @@ -355,14 +344,13 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (device->physical_device->rad_info.gfx_level >= GFX11) { - desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); + desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); } else if (device->physical_device->rad_info.gfx_level >= GFX10) { desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); } else { - desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + desc[3] |= + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); } /* stride gsvs_itemsize, num records 64 @@ -381,15 +369,14 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX6(1); if (device->physical_device->rad_info.gfx_level >= GFX11) { - desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); + desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); } else if (device->physical_device->rad_info.gfx_level >= GFX10) { desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); } else if (device->physical_device->rad_info.gfx_level >= GFX8) { /* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */ - desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | - S_008F0C_ELEMENT_SIZE(1); + desc[7] |= + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1); } else { desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1); @@ -409,14 +396,13 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (device->physical_device->rad_info.gfx_level >= GFX11) { - desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); + desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); } else if (device->physical_device->rad_info.gfx_level >= GFX10) { - desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); + desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | + S_008F0C_RESOURCE_LEVEL(1); } else { - desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + desc[3] |= + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); } desc[4] = tess_offchip_va; @@ -426,14 +412,13 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (device->physical_device->rad_info.gfx_level >= GFX11) { - desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); + desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); } else if (device->physical_device->rad_info.gfx_level >= GFX10) { - desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); + desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | + S_008F0C_RESOURCE_LEVEL(1); } else { - desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + desc[7] |= + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); } } @@ -442,8 +427,7 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl if (task_rings_bo) { uint64_t task_va = radv_buffer_get_va(task_rings_bo); uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset; - uint64_t task_payload_ring_va = - task_va + device->physical_device->task_info.payload_ring_offset; + uint64_t task_payload_ring_va = task_va + device->physical_device->task_info.payload_ring_offset; desc[0] = task_draw_ring_va; desc[1] = S_008F04_BASE_ADDRESS_HI(task_draw_ring_va >> 32); @@ -452,12 +436,11 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (device->physical_device->rad_info.gfx_level >= GFX11) { - desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); + desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); } else { assert(device->physical_device->rad_info.gfx_level >= GFX10_3); - desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); + desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | + S_008F0C_RESOURCE_LEVEL(1); } desc[4] = task_payload_ring_va; @@ -467,12 +450,11 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (device->physical_device->rad_info.gfx_level >= GFX11) { - desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); + desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); } else { assert(device->physical_device->rad_info.gfx_level >= GFX10_3); - desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); + desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | + S_008F0C_RESOURCE_LEVEL(1); } } @@ -488,12 +470,11 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (device->physical_device->rad_info.gfx_level >= GFX11) { - desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); + desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); } else { assert(device->physical_device->rad_info.gfx_level >= GFX10_3); - desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); + desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | + S_008F0C_RESOURCE_LEVEL(1); } } @@ -509,8 +490,7 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl desc[2] = attr_ring_size; desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_32_32_32_FLOAT) | - S_008F0C_INDEX_STRIDE(2) /* 32 elements */; + S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_32_32_32_FLOAT) | S_008F0C_INDEX_STRIDE(2) /* 32 elements */; } desc += 4; @@ -528,9 +508,8 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl } static void -radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, - struct radeon_winsys_bo *esgs_ring_bo, uint32_t esgs_ring_size, - struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size) +radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *esgs_ring_bo, + uint32_t esgs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size) { if (!esgs_ring_bo && !gsvs_ring_bo) return; @@ -553,8 +532,7 @@ radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, } static void -radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, - struct radeon_winsys_bo *tess_rings_bo) +radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *tess_rings_bo) { uint64_t tf_va; uint32_t tf_ring_size; @@ -581,19 +559,16 @@ radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40)); } - radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, - device->physical_device->hs.hs_offchip_param); + radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param); } else { radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size)); radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8); - radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, - device->physical_device->hs.hs_offchip_param); + radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param); } } static VkResult -radv_initialise_task_control_buffer(struct radv_device *device, - struct radeon_winsys_bo *task_rings_bo) +radv_initialise_task_control_buffer(struct radv_device *device, struct radeon_winsys_bo *task_rings_bo) { uint32_t *ptr = (uint32_t *)device->ws->buffer_map(task_rings_bo); if (!ptr) @@ -624,8 +599,8 @@ radv_initialise_task_control_buffer(struct radv_device *device, } static void -radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs, - struct radeon_winsys_bo *task_rings_bo, bool compute) +radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *task_rings_bo, + bool compute) { if (!task_rings_bo) return; @@ -643,8 +618,7 @@ radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs, } static void -radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, - uint32_t size_per_wave, uint32_t waves, +radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves, struct radeon_winsys_bo *scratch_bo) { const struct radeon_info *info = &device->physical_device->rad_info; @@ -665,15 +639,13 @@ radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, radeon_emit(cs, va >> 8); /* SPI_GFX_SCRATCH_BASE_LO */ radeon_emit(cs, va >> 40); /* SPI_GFX_SCRATCH_BASE_HI */ } else { - radeon_set_context_reg( - cs, R_0286E8_SPI_TMPRING_SIZE, - S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024))); + radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE, + S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024))); } } static void -radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, - uint32_t size_per_wave, uint32_t waves, +radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves, struct radeon_winsys_bo *compute_scratch_bo) { const struct radeon_info *info = &device->physical_device->rad_info; @@ -707,8 +679,7 @@ radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, radeon_set_sh_reg( cs, R_00B860_COMPUTE_TMPRING_SIZE, - S_00B860_WAVES(waves) | - S_00B860_WAVESIZE(round_up_u32(size_per_wave, info->gfx_level >= GFX11 ? 256 : 1024))); + S_00B860_WAVES(waves) | S_00B860_WAVESIZE(round_up_u32(size_per_wave, info->gfx_level >= GFX11 ? 256 : 1024))); } static void @@ -749,16 +720,14 @@ radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmd } } else if (device->physical_device->rad_info.gfx_level >= GFX10) { uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0, - R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, - R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS}; + R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS}; for (int i = 0; i < ARRAY_SIZE(regs); ++i) { radv_emit_shader_pointer(device, cs, regs[i], va, true); } } else if (device->physical_device->rad_info.gfx_level == GFX9) { uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0, - R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, - R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS}; + R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS}; for (int i = 0; i < ARRAY_SIZE(regs); ++i) { radv_emit_shader_pointer(device, cs, regs[i], va, true); @@ -775,8 +744,8 @@ radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmd } static void -radv_emit_attribute_ring(struct radv_device *device, struct radeon_cmdbuf *cs, - struct radeon_winsys_bo *attr_ring_bo, uint32_t attr_ring_size) +radv_emit_attribute_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *attr_ring_bo, + uint32_t attr_ring_size) { const struct radv_physical_device *pdevice = device->physical_device; uint64_t va; @@ -795,8 +764,7 @@ radv_emit_attribute_ring(struct radv_device *device, struct radeon_cmdbuf *cs, * bottom-of-pipe EOP event, but increment the PWS counter instead of writing memory. */ radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0)); - radeon_emit(cs, S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5) | - S_490_PWS_ENABLE(1)); + radeon_emit(cs, S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5) | S_490_PWS_ENABLE(1)); radeon_emit(cs, 0); /* DST_SEL, INT_SEL, DATA_SEL */ radeon_emit(cs, 0); /* ADDRESS_LO */ radeon_emit(cs, 0); /* ADDRESS_HI */ @@ -806,8 +774,8 @@ radv_emit_attribute_ring(struct radv_device *device, struct radeon_cmdbuf *cs, /* Wait for the PWS counter. */ radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0)); - radeon_emit(cs, S_580_PWS_STAGE_SEL(V_580_CP_ME) | S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) | - S_580_PWS_ENA2(1) | S_580_PWS_COUNT(0)); + radeon_emit(cs, S_580_PWS_STAGE_SEL(V_580_CP_ME) | S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) | S_580_PWS_ENA2(1) | + S_580_PWS_COUNT(0)); radeon_emit(cs, 0xffffffff); /* GCR_SIZE */ radeon_emit(cs, 0x01ffffff); /* GCR_SIZE_HI */ radeon_emit(cs, 0); /* GCR_BASE_LO */ @@ -817,10 +785,9 @@ radv_emit_attribute_ring(struct radv_device *device, struct radeon_cmdbuf *cs, /* The PS will read inputs from this address. */ radeon_set_uconfig_reg(cs, R_031118_SPI_ATTRIBUTE_RING_BASE, va >> 16); - radeon_set_uconfig_reg( - cs, R_03111C_SPI_ATTRIBUTE_RING_SIZE, - S_03111C_MEM_SIZE(((attr_ring_size / pdevice->rad_info.max_se) >> 16) - 1) | - S_03111C_BIG_PAGE(pdevice->rad_info.discardable_allows_big_page) | S_03111C_L1_POLICY(1)); + radeon_set_uconfig_reg(cs, R_03111C_SPI_ATTRIBUTE_RING_SIZE, + S_03111C_MEM_SIZE(((attr_ring_size / pdevice->rad_info.max_se) >> 16) - 1) | + S_03111C_BIG_PAGE(pdevice->rad_info.discardable_allows_big_page) | S_03111C_L1_POLICY(1)); } static void @@ -868,19 +835,17 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi const bool add_sample_positions = !queue->ring_info.sample_positions && needs->sample_positions; const uint32_t scratch_size = needs->scratch_size_per_wave * needs->scratch_waves; - const uint32_t queue_scratch_size = - queue->ring_info.scratch_size_per_wave * queue->ring_info.scratch_waves; + const uint32_t queue_scratch_size = queue->ring_info.scratch_size_per_wave * queue->ring_info.scratch_waves; if (scratch_size > queue_scratch_size) { - result = ws->buffer_create(ws, scratch_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags, - RADV_BO_PRIORITY_SCRATCH, 0, &scratch_bo); + result = ws->buffer_create(ws, scratch_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, + &scratch_bo); if (result != VK_SUCCESS) goto fail; radv_rmv_log_command_buffer_bo_create(device, scratch_bo, 0, 0, scratch_size); } - const uint32_t compute_scratch_size = - needs->compute_scratch_size_per_wave * needs->compute_scratch_waves; + const uint32_t compute_scratch_size = needs->compute_scratch_size_per_wave * needs->compute_scratch_waves; const uint32_t compute_queue_scratch_size = queue->ring_info.compute_scratch_size_per_wave * queue->ring_info.compute_scratch_waves; if (compute_scratch_size > compute_queue_scratch_size) { @@ -908,10 +873,10 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi } if (!queue->ring_info.tess_rings && needs->tess_rings) { - uint64_t tess_rings_size = device->physical_device->hs.tess_offchip_ring_offset + - device->physical_device->hs.tess_offchip_ring_size; - result = ws->buffer_create(ws, tess_rings_size, 256, RADEON_DOMAIN_VRAM, ring_bo_flags, - RADV_BO_PRIORITY_SCRATCH, 0, &tess_rings_bo); + uint64_t tess_rings_size = + device->physical_device->hs.tess_offchip_ring_offset + device->physical_device->hs.tess_offchip_ring_size; + result = ws->buffer_create(ws, tess_rings_size, 256, RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, + 0, &tess_rings_bo); if (result != VK_SUCCESS) goto fail; radv_rmv_log_command_buffer_bo_create(device, tess_rings_bo, 0, 0, tess_rings_size); @@ -926,9 +891,8 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi uint32_t task_rings_bo_flags = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM; - result = ws->buffer_create(ws, device->physical_device->task_info.bo_size_bytes, 256, - RADEON_DOMAIN_VRAM, task_rings_bo_flags, RADV_BO_PRIORITY_SCRATCH, - 0, &task_rings_bo); + result = ws->buffer_create(ws, device->physical_device->task_info.bo_size_bytes, 256, RADEON_DOMAIN_VRAM, + task_rings_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &task_rings_bo); if (result != VK_SUCCESS) goto fail; radv_rmv_log_command_buffer_bo_create(device, task_rings_bo, 0, 0, @@ -941,23 +905,20 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi if (!queue->ring_info.mesh_scratch_ring && needs->mesh_scratch_ring) { assert(device->physical_device->rad_info.gfx_level >= GFX10_3); - result = ws->buffer_create(ws, RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES, - 256, RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, - 0, &mesh_scratch_ring_bo); + result = ws->buffer_create(ws, RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES, 256, + RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &mesh_scratch_ring_bo); if (result != VK_SUCCESS) goto fail; - radv_rmv_log_command_buffer_bo_create( - device, mesh_scratch_ring_bo, 0, 0, - RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES); + radv_rmv_log_command_buffer_bo_create(device, mesh_scratch_ring_bo, 0, 0, + RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES); } if (needs->attr_ring_size > queue->ring_info.attr_ring_size) { assert(device->physical_device->rad_info.gfx_level >= GFX11); - result = ws->buffer_create(ws, needs->attr_ring_size, 2 * 1024 * 1024 /* 2MiB */, - RADEON_DOMAIN_VRAM, - RADEON_FLAG_32BIT | RADEON_FLAG_DISCARDABLE | ring_bo_flags, - RADV_BO_PRIORITY_SCRATCH, 0, &attr_ring_bo); + result = ws->buffer_create(ws, needs->attr_ring_size, 2 * 1024 * 1024 /* 2MiB */, RADEON_DOMAIN_VRAM, + RADEON_FLAG_32BIT | RADEON_FLAG_DISCARDABLE | ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, + 0, &attr_ring_bo); if (result != VK_SUCCESS) goto fail; radv_rmv_log_command_buffer_bo_create(device, attr_ring_bo, 0, 0, needs->attr_ring_size); @@ -969,8 +930,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi /* 4 streamout GDS counters. * We need 256B (64 dw) of GDS, otherwise streamout hangs. */ - result = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, ring_bo_flags, - RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo); + result = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo); if (result != VK_SUCCESS) goto fail; @@ -985,8 +945,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi if (!queue->ring_info.gds_oa && needs->gds_oa) { assert(device->physical_device->rad_info.gfx_level >= GFX10); - result = ws->buffer_create(ws, 4, 1, RADEON_DOMAIN_OA, ring_bo_flags, - RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo); + result = ws->buffer_create(ws, 4, 1, RADEON_DOMAIN_OA, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo); if (result != VK_SUCCESS) goto fail; @@ -1004,15 +963,14 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi * when it uses the task shader rings. The task rings BO is shared between the * GFX and compute queues and already initialized here. */ - if ((queue->qf == RADV_QUEUE_COMPUTE && !descriptor_bo && task_rings_bo) || - scratch_bo != queue->scratch_bo || esgs_ring_bo != queue->esgs_ring_bo || - gsvs_ring_bo != queue->gsvs_ring_bo || tess_rings_bo != queue->tess_rings_bo || - task_rings_bo != queue->task_rings_bo || + if ((queue->qf == RADV_QUEUE_COMPUTE && !descriptor_bo && task_rings_bo) || scratch_bo != queue->scratch_bo || + esgs_ring_bo != queue->esgs_ring_bo || gsvs_ring_bo != queue->gsvs_ring_bo || + tess_rings_bo != queue->tess_rings_bo || task_rings_bo != queue->task_rings_bo || mesh_scratch_ring_bo != queue->mesh_scratch_ring_bo || attr_ring_bo != queue->attr_ring_bo || add_sample_positions) { uint32_t size = 0; - if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo || - attr_ring_bo || add_sample_positions) { + if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo || attr_ring_bo || + add_sample_positions) { size = 176; /* 2 dword + 2 padding + 4 dword * 10 */ if (add_sample_positions) size += 128; /* 64+32+16+8 = 120 bytes */ @@ -1020,10 +978,9 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi size = 8; /* 2 dword */ } - result = ws->buffer_create( - ws, size, 4096, RADEON_DOMAIN_VRAM, - RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY, - RADV_BO_PRIORITY_DESCRIPTOR, 0, &descriptor_bo); + result = ws->buffer_create(ws, size, 4096, RADEON_DOMAIN_VRAM, + RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY, + RADV_BO_PRIORITY_DESCRIPTOR, 0, &descriptor_bo); if (result != VK_SUCCESS) goto fail; } @@ -1046,12 +1003,11 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi map[1] = rsrc1; } - if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo || - attr_ring_bo || add_sample_positions) - radv_fill_shader_rings(device, map, add_sample_positions, needs->esgs_ring_size, - esgs_ring_bo, needs->gsvs_ring_size, gsvs_ring_bo, tess_rings_bo, - task_rings_bo, mesh_scratch_ring_bo, needs->attr_ring_size, - attr_ring_bo); + if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo || attr_ring_bo || + add_sample_positions) + radv_fill_shader_rings(device, map, add_sample_positions, needs->esgs_ring_size, esgs_ring_bo, + needs->gsvs_ring_size, gsvs_ring_bo, tess_rings_bo, task_rings_bo, mesh_scratch_ring_bo, + needs->attr_ring_size, attr_ring_bo); ws->buffer_unmap(descriptor_bo); } @@ -1086,16 +1042,14 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); } - radv_emit_gs_ring_sizes(device, cs, esgs_ring_bo, needs->esgs_ring_size, gsvs_ring_bo, - needs->gsvs_ring_size); + radv_emit_gs_ring_sizes(device, cs, esgs_ring_bo, needs->esgs_ring_size, gsvs_ring_bo, needs->gsvs_ring_size); radv_emit_tess_factor_ring(device, cs, tess_rings_bo); radv_emit_task_rings(device, cs, task_rings_bo, false); radv_emit_attribute_ring(device, cs, attr_ring_bo, needs->attr_ring_size); radv_emit_graphics_shader_pointers(device, cs, descriptor_bo); - radv_emit_compute_scratch(device, cs, needs->compute_scratch_size_per_wave, - needs->compute_scratch_waves, compute_scratch_bo); - radv_emit_graphics_scratch(device, cs, needs->scratch_size_per_wave, needs->scratch_waves, - scratch_bo); + radv_emit_compute_scratch(device, cs, needs->compute_scratch_size_per_wave, needs->compute_scratch_waves, + compute_scratch_bo); + radv_emit_graphics_scratch(device, cs, needs->scratch_size_per_wave, needs->scratch_waves, scratch_bo); break; case RADV_QUEUE_COMPUTE: radv_init_compute_state(cs, device); @@ -1107,8 +1061,8 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi radv_emit_task_rings(device, cs, task_rings_bo, true); radv_emit_compute_shader_pointers(device, cs, descriptor_bo); - radv_emit_compute_scratch(device, cs, needs->compute_scratch_size_per_wave, - needs->compute_scratch_waves, compute_scratch_bo); + radv_emit_compute_scratch(device, cs, needs->compute_scratch_size_per_wave, needs->compute_scratch_waves, + compute_scratch_bo); break; default: break; @@ -1129,8 +1083,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH; } - si_cs_emit_cache_flush(ws, cs, gfx_level, NULL, 0, is_mec, flush_bits, &sqtt_flush_bits, - 0); + si_cs_emit_cache_flush(ws, cs, gfx_level, NULL, 0, is_mec, flush_bits, &sqtt_flush_bits, 0); } result = ws->cs_finalize(cs); @@ -1231,8 +1184,8 @@ fail: static VkResult radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device, - struct vk_command_buffer *const *cmd_buffers, uint32_t cmd_buffer_count, - bool *use_perf_counters, bool *has_follower) + struct vk_command_buffer *const *cmd_buffers, uint32_t cmd_buffer_count, bool *use_perf_counters, + bool *has_follower) { if (queue->qf != RADV_QUEUE_GENERAL && queue->qf != RADV_QUEUE_COMPUTE) return VK_SUCCESS; @@ -1250,13 +1203,11 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device for (uint32_t j = 0; j < cmd_buffer_count; j++) { struct radv_cmd_buffer *cmd_buffer = container_of(cmd_buffers[j], struct radv_cmd_buffer, vk); - needs.scratch_size_per_wave = - MAX2(needs.scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed); + needs.scratch_size_per_wave = MAX2(needs.scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed); needs.scratch_waves = MAX2(needs.scratch_waves, cmd_buffer->scratch_waves_wanted); - needs.compute_scratch_size_per_wave = MAX2(needs.compute_scratch_size_per_wave, - cmd_buffer->compute_scratch_size_per_wave_needed); - needs.compute_scratch_waves = - MAX2(needs.compute_scratch_waves, cmd_buffer->compute_scratch_waves_wanted); + needs.compute_scratch_size_per_wave = + MAX2(needs.compute_scratch_size_per_wave, cmd_buffer->compute_scratch_size_per_wave_needed); + needs.compute_scratch_waves = MAX2(needs.compute_scratch_waves, cmd_buffer->compute_scratch_waves_wanted); needs.esgs_ring_size = MAX2(needs.esgs_ring_size, cmd_buffer->esgs_ring_size_needed); needs.gsvs_ring_size = MAX2(needs.gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed); needs.tess_rings |= cmd_buffer->tess_rings_needed; @@ -1270,36 +1221,32 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device } /* Sanitize scratch size information. */ - needs.scratch_waves = needs.scratch_size_per_wave - ? MIN2(needs.scratch_waves, UINT32_MAX / needs.scratch_size_per_wave) - : 0; + needs.scratch_waves = + needs.scratch_size_per_wave ? MIN2(needs.scratch_waves, UINT32_MAX / needs.scratch_size_per_wave) : 0; needs.compute_scratch_waves = needs.compute_scratch_size_per_wave ? MIN2(needs.compute_scratch_waves, UINT32_MAX / needs.compute_scratch_size_per_wave) : 0; if (device->physical_device->rad_info.gfx_level >= GFX11 && queue->qf == RADV_QUEUE_GENERAL) { - needs.attr_ring_size = device->physical_device->rad_info.attribute_ring_size_per_se * - device->physical_device->rad_info.max_se; + needs.attr_ring_size = + device->physical_device->rad_info.attribute_ring_size_per_se * device->physical_device->rad_info.max_se; } /* Return early if we already match these needs. * Note that it's not possible for any of the needed values to be less * than what the queue already had, because we only ever increase the allocated size. */ - if (queue->initial_full_flush_preamble_cs && - queue->ring_info.scratch_size_per_wave == needs.scratch_size_per_wave && + if (queue->initial_full_flush_preamble_cs && queue->ring_info.scratch_size_per_wave == needs.scratch_size_per_wave && queue->ring_info.scratch_waves == needs.scratch_waves && queue->ring_info.compute_scratch_size_per_wave == needs.compute_scratch_size_per_wave && queue->ring_info.compute_scratch_waves == needs.compute_scratch_waves && queue->ring_info.esgs_ring_size == needs.esgs_ring_size && - queue->ring_info.gsvs_ring_size == needs.gsvs_ring_size && - queue->ring_info.tess_rings == needs.tess_rings && + queue->ring_info.gsvs_ring_size == needs.gsvs_ring_size && queue->ring_info.tess_rings == needs.tess_rings && queue->ring_info.task_rings == needs.task_rings && queue->ring_info.mesh_scratch_ring == needs.mesh_scratch_ring && - queue->ring_info.attr_ring_size == needs.attr_ring_size && - queue->ring_info.gds == needs.gds && queue->ring_info.gds_oa == needs.gds_oa && - queue->ring_info.sample_positions == needs.sample_positions) + queue->ring_info.attr_ring_size == needs.attr_ring_size && queue->ring_info.gds == needs.gds && + queue->ring_info.gds_oa == needs.gds_oa && queue->ring_info.sample_positions == needs.sample_positions) return VK_SUCCESS; return radv_update_preamble_cs(queue, device, &needs); @@ -1314,16 +1261,14 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue) VkResult r = VK_SUCCESS; struct radv_device *device = queue->device; struct radeon_winsys *ws = device->ws; - const enum amd_ip_type leader_ip = - radv_queue_family_to_ring(device->physical_device, queue->state.qf); + const enum amd_ip_type leader_ip = radv_queue_family_to_ring(device->physical_device, queue->state.qf); struct radeon_winsys_bo *gang_sem_bo = NULL; /* Gang semaphores BO. * DWORD 0: used in preambles, gang leader writes, gang members wait. * DWORD 1: used in postambles, gang leader waits, gang members write. */ - r = ws->buffer_create(ws, 8, 4, RADEON_DOMAIN_VRAM, - RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM, + r = ws->buffer_create(ws, 8, 4, RADEON_DOMAIN_VRAM, RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM, RADV_BO_PRIORITY_SCRATCH, 0, &gang_sem_bo); if (r != VK_SUCCESS) return r; @@ -1357,14 +1302,12 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue) */ radv_cp_wait_mem(ace_pre_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, ace_wait_va, 1, 0xffffffff); radeon_emit(ace_pre_cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(ace_pre_cs, - S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); + radeon_emit(ace_pre_cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(ace_pre_cs, ace_wait_va); radeon_emit(ace_pre_cs, ace_wait_va >> 32); radeon_emit(ace_pre_cs, 0); radeon_emit(leader_pre_cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(leader_pre_cs, - S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); + radeon_emit(leader_pre_cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(leader_pre_cs, ace_wait_va); radeon_emit(leader_pre_cs, ace_wait_va >> 32); radeon_emit(leader_pre_cs, 1); @@ -1377,14 +1320,12 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue) */ radv_cp_wait_mem(leader_post_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, leader_wait_va, 1, 0xffffffff); radeon_emit(leader_post_cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(leader_post_cs, - S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); + radeon_emit(leader_post_cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(leader_post_cs, leader_wait_va); radeon_emit(leader_post_cs, leader_wait_va >> 32); radeon_emit(leader_post_cs, 0); radeon_emit(ace_post_cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(ace_post_cs, - S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); + radeon_emit(ace_post_cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(ace_post_cs, leader_wait_va); radeon_emit(ace_post_cs, leader_wait_va >> 32); radeon_emit(ace_post_cs, 1); @@ -1509,16 +1450,16 @@ radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool uint64_t set_va = va + (unlock ? 0 : 8 * pass); radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_COUNT_SEL | + COPY_DATA_WR_CONFIRM); radeon_emit(cs, 0); /* immediate */ radeon_emit(cs, 0); radeon_emit(cs, unset_va); radeon_emit(cs, unset_va >> 32); radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_COUNT_SEL | + COPY_DATA_WR_CONFIRM); radeon_emit(cs, 1); /* immediate */ radeon_emit(cs, 0); radeon_emit(cs, set_va); @@ -1528,8 +1469,8 @@ radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool uint64_t mutex_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_LOCK_OFFSET; radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_COUNT_SEL | + COPY_DATA_WR_CONFIRM); radeon_emit(cs, 0); /* immediate */ radeon_emit(cs, 0); radeon_emit(cs, mutex_va); @@ -1614,8 +1555,7 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi } memcpy(new_waits, submission->waits, sizeof(struct vk_sync_wait) * submission->wait_count); - radv_get_shader_upload_sync_wait(queue->device, shader_upload_seq, - &new_waits[submission->wait_count]); + radv_get_shader_upload_sync_wait(queue->device, shader_upload_seq, &new_waits[submission->wait_count]); waits = new_waits; wait_count += 1; @@ -1667,14 +1607,11 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi initial_preambles[num_initial_preambles++] = queue->state.gang_wait_preamble_cs; initial_preambles[num_initial_preambles++] = queue->follower_state->gang_wait_preamble_cs; initial_preambles[num_initial_preambles++] = - need_wait ? queue->follower_state->initial_full_flush_preamble_cs - : queue->follower_state->initial_preamble_cs; + need_wait ? queue->follower_state->initial_full_flush_preamble_cs : queue->follower_state->initial_preamble_cs; continue_preambles[num_continue_preambles++] = queue->state.gang_wait_preamble_cs; - continue_preambles[num_continue_preambles++] = - queue->follower_state->gang_wait_preamble_cs; - continue_preambles[num_continue_preambles++] = - queue->follower_state->continue_preamble_cs; + continue_preambles[num_continue_preambles++] = queue->follower_state->gang_wait_preamble_cs; + continue_preambles[num_continue_preambles++] = queue->follower_state->continue_preamble_cs; postambles[num_postambles++] = queue->follower_state->gang_wait_postamble_cs; postambles[num_postambles++] = queue->state.gang_wait_postamble_cs; @@ -1709,17 +1646,14 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi /* Add CS from submitted command buffers. */ for (unsigned c = 0; c < advance; ++c) { - struct radv_cmd_buffer *cmd_buffer = - (struct radv_cmd_buffer *)submission->command_buffers[j + c]; + struct radv_cmd_buffer *cmd_buffer = (struct radv_cmd_buffer *)submission->command_buffers[j + c]; assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - const bool can_chain_next = - !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT); + const bool can_chain_next = !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT); /* Follower needs to be first because the last CS must match the queue's IP type. */ if (radv_cmd_buffer_has_follower(cmd_buffer)) { queue->device->ws->cs_unchain(cmd_buffer->gang.cs); - if (!chainable_ace || - !queue->device->ws->cs_chain(chainable_ace, cmd_buffer->gang.cs, false)) + if (!chainable_ace || !queue->device->ws->cs_chain(chainable_ace, cmd_buffer->gang.cs, false)) cs_array[num_submitted_cs++] = cmd_buffer->gang.cs; chainable_ace = can_chain_next ? cmd_buffer->gang.cs : NULL; @@ -1727,8 +1661,7 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi } queue->device->ws->cs_unchain(cmd_buffer->cs); - if (!chainable || - !queue->device->ws->cs_chain(chainable, cmd_buffer->cs, queue->state.uses_shadow_regs)) + if (!chainable || !queue->device->ws->cs_chain(chainable, cmd_buffer->cs, queue->state.uses_shadow_regs)) cs_array[num_submitted_cs++] = cmd_buffer->cs; chainable = can_chain_next ? cmd_buffer->cs : NULL; @@ -1737,13 +1670,11 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi submit.cs_count = num_submitted_cs; submit.initial_preamble_count = submit_ace ? num_initial_preambles : num_1q_initial_preambles; - submit.continue_preamble_count = - submit_ace ? num_continue_preambles : num_1q_continue_preambles; + submit.continue_preamble_count = submit_ace ? num_continue_preambles : num_1q_continue_preambles; submit.postamble_count = submit_ace ? num_postambles : num_1q_postambles; result = queue->device->ws->cs_submit(ctx, &submit, j == 0 ? wait_count : 0, waits, - last_submit ? submission->signal_count : 0, - submission->signals); + last_submit ? submission->signal_count : 0, submission->signals); if (result != VK_SUCCESS) goto fail; @@ -1760,8 +1691,7 @@ radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submi initial_preambles[1] = !use_ace ? NULL : queue->follower_state->initial_preamble_cs; } - queue->last_shader_upload_seq = - MAX2(queue->last_shader_upload_seq, shader_upload_seq); + queue->last_shader_upload_seq = MAX2(queue->last_shader_upload_seq, shader_upload_seq); fail: free(cs_array); @@ -1841,8 +1771,7 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx, if (result != VK_SUCCESS) return result; - queue->state.uses_shadow_regs = - device->uses_shadow_regs && queue->state.qf == RADV_QUEUE_GENERAL; + queue->state.uses_shadow_regs = device->uses_shadow_regs && queue->state.qf == RADV_QUEUE_GENERAL; if (queue->state.uses_shadow_regs) { result = radv_create_shadow_regs_preamble(device, &queue->state); if (result != VK_SUCCESS) diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index e32e40b..7cc90ee 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -112,10 +112,10 @@ struct radeon_cmdbuf { /* These are uint64_t to tell the compiler that buf can't alias them. * If they're uint32_t the generated code needs to redundantly * store and reload them between buf writes. */ - uint64_t cdw; /* Number of used dwords. */ - uint64_t max_dw; /* Maximum number of dwords. */ + uint64_t cdw; /* Number of used dwords. */ + uint64_t max_dw; /* Maximum number of dwords. */ uint64_t reserved_dw; /* Number of dwords reserved through radeon_check_space() */ - uint32_t *buf; /* The base pointer of the chunk. */ + uint32_t *buf; /* The base pointer of the chunk. */ }; #define RADEON_SURF_TYPE_MASK 0xFF @@ -129,11 +129,9 @@ struct radeon_cmdbuf { #define RADEON_SURF_MODE_MASK 0xFF #define RADEON_SURF_MODE_SHIFT 8 -#define RADEON_SURF_GET(v, field) \ - (((v) >> RADEON_SURF_##field##_SHIFT) & RADEON_SURF_##field##_MASK) +#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_##field##_SHIFT) & RADEON_SURF_##field##_MASK) #define RADEON_SURF_SET(v, field) (((v)&RADEON_SURF_##field##_MASK) << RADEON_SURF_##field##_SHIFT) -#define RADEON_SURF_CLR(v, field) \ - ((v) & ~(RADEON_SURF_##field##_MASK << RADEON_SURF_##field##_SHIFT)) +#define RADEON_SURF_CLR(v, field) ((v) & ~(RADEON_SURF_##field##_MASK << RADEON_SURF_##field##_SHIFT)) enum radeon_bo_layout { RADEON_LAYOUT_LINEAR = 0, @@ -240,45 +238,39 @@ struct radeon_winsys { uint64_t (*query_value)(struct radeon_winsys *ws, enum radeon_value_id value); - bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, unsigned num_registers, - uint32_t *out); + bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, unsigned num_registers, uint32_t *out); const char *(*get_chip_name)(struct radeon_winsys *ws); - VkResult (*buffer_create)(struct radeon_winsys *ws, uint64_t size, unsigned alignment, - enum radeon_bo_domain domain, enum radeon_bo_flag flags, - unsigned priority, uint64_t address, struct radeon_winsys_bo **out_bo); + VkResult (*buffer_create)(struct radeon_winsys *ws, uint64_t size, unsigned alignment, enum radeon_bo_domain domain, + enum radeon_bo_flag flags, unsigned priority, uint64_t address, + struct radeon_winsys_bo **out_bo); void (*buffer_destroy)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo); void *(*buffer_map)(struct radeon_winsys_bo *bo); - VkResult (*buffer_from_ptr)(struct radeon_winsys *ws, void *pointer, uint64_t size, - unsigned priority, struct radeon_winsys_bo **out_bo); + VkResult (*buffer_from_ptr)(struct radeon_winsys *ws, void *pointer, uint64_t size, unsigned priority, + struct radeon_winsys_bo **out_bo); - VkResult (*buffer_from_fd)(struct radeon_winsys *ws, int fd, unsigned priority, - struct radeon_winsys_bo **out_bo, uint64_t *alloc_size); + VkResult (*buffer_from_fd)(struct radeon_winsys *ws, int fd, unsigned priority, struct radeon_winsys_bo **out_bo, + uint64_t *alloc_size); bool (*buffer_get_fd)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo, int *fd); - bool (*buffer_get_flags_from_fd)(struct radeon_winsys *ws, int fd, - enum radeon_bo_domain *domains, enum radeon_bo_flag *flags); + bool (*buffer_get_flags_from_fd)(struct radeon_winsys *ws, int fd, enum radeon_bo_domain *domains, + enum radeon_bo_flag *flags); void (*buffer_unmap)(struct radeon_winsys_bo *bo); - void (*buffer_set_metadata)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo, - struct radeon_bo_metadata *md); - void (*buffer_get_metadata)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo, - struct radeon_bo_metadata *md); + void (*buffer_set_metadata)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo, struct radeon_bo_metadata *md); + void (*buffer_get_metadata)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo, struct radeon_bo_metadata *md); - VkResult (*buffer_virtual_bind)(struct radeon_winsys *ws, struct radeon_winsys_bo *parent, - uint64_t offset, uint64_t size, struct radeon_winsys_bo *bo, - uint64_t bo_offset); + VkResult (*buffer_virtual_bind)(struct radeon_winsys *ws, struct radeon_winsys_bo *parent, uint64_t offset, + uint64_t size, struct radeon_winsys_bo *bo, uint64_t bo_offset); - VkResult (*buffer_make_resident)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo, - bool resident); + VkResult (*buffer_make_resident)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo, bool resident); - VkResult (*ctx_create)(struct radeon_winsys *ws, enum radeon_ctx_priority priority, - struct radeon_winsys_ctx **ctx); + VkResult (*ctx_create)(struct radeon_winsys *ws, enum radeon_ctx_priority priority, struct radeon_winsys_ctx **ctx); void (*ctx_destroy)(struct radeon_winsys_ctx *ctx); bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx, enum amd_ip_type amd_ip_type, int ring_index); @@ -289,8 +281,7 @@ struct radeon_winsys { enum radeon_bo_domain (*cs_domain)(const struct radeon_winsys *ws); - struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys *ws, enum amd_ip_type amd_ip_type, - bool is_secondary); + struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys *ws, enum amd_ip_type amd_ip_type, bool is_secondary); void (*cs_destroy)(struct radeon_cmdbuf *cs); @@ -304,15 +295,13 @@ struct radeon_winsys { void (*cs_grow)(struct radeon_cmdbuf *cs, size_t min_size); - VkResult (*cs_submit)(struct radeon_winsys_ctx *ctx, - const struct radv_winsys_submit_info *submit, uint32_t wait_count, - const struct vk_sync_wait *waits, uint32_t signal_count, + VkResult (*cs_submit)(struct radeon_winsys_ctx *ctx, const struct radv_winsys_submit_info *submit, + uint32_t wait_count, const struct vk_sync_wait *waits, uint32_t signal_count, const struct vk_sync_signal *signals); void (*cs_add_buffer)(struct radeon_cmdbuf *cs, struct radeon_winsys_bo *bo); - void (*cs_execute_secondary)(struct radeon_cmdbuf *parent, struct radeon_cmdbuf *child, - bool allow_ib2); + void (*cs_execute_secondary)(struct radeon_cmdbuf *parent, struct radeon_cmdbuf *child, bool allow_ib2); void (*cs_dump)(struct radeon_cmdbuf *cs, FILE *file, const int *trace_ids, int trace_id_count); @@ -320,8 +309,7 @@ struct radeon_winsys { void (*dump_bo_log)(struct radeon_winsys *ws, FILE *file); - int (*surface_init)(struct radeon_winsys *ws, const struct ac_surf_info *surf_info, - struct radeon_surf *surf); + int (*surface_init)(struct radeon_winsys *ws, const struct ac_surf_info *surf_info, struct radeon_surf *surf); int (*get_fd)(struct radeon_winsys *ws); diff --git a/src/amd/vulkan/radv_rmv.c b/src/amd/vulkan/radv_rmv.c index 00c061b..1564b8d 100644 --- a/src/amd/vulkan/radv_rmv.c +++ b/src/amd/vulkan/radv_rmv.c @@ -38,8 +38,7 @@ static FILE * open_event_file(const char *event_name, const char *event_filename, const char *mode) { char filename[2048]; - snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/events/amdgpu/%s/%s", - event_name, event_filename); + snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/events/amdgpu/%s/%s", event_name, event_filename); return fopen(filename, mode); } @@ -81,8 +80,7 @@ open_trace_pipe(uint32_t cpu_index, int *dst_fd) *dst_fd = -1; #else char filename[2048]; - snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/per_cpu/cpu%d/trace_pipe_raw", - cpu_index); + snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/per_cpu/cpu%d/trace_pipe_raw", cpu_index); /* I/O to the pipe needs to be non-blocking, otherwise reading all available * data would block indefinitely by waiting for more data to be written to the pipe */ *dst_fd = open(filename, O_RDONLY | O_NONBLOCK); @@ -98,11 +96,7 @@ struct trace_page_header { int32_t commit; }; -enum trace_event_type { - TRACE_EVENT_TYPE_PADDING = 29, - TRACE_EVENT_TYPE_EXTENDED_DELTA, - TRACE_EVENT_TYPE_TIMESTAMP -}; +enum trace_event_type { TRACE_EVENT_TYPE_PADDING = 29, TRACE_EVENT_TYPE_EXTENDED_DELTA, TRACE_EVENT_TYPE_TIMESTAMP }; struct trace_event_header { uint32_t type_len : 5; @@ -146,8 +140,7 @@ struct trace_event_address_array { static void emit_page_table_update_event(struct vk_memory_trace_data *data, bool is_apu, uint64_t timestamp, - struct trace_event_amdgpu_vm_update_ptes *event, uint64_t *addrs, - unsigned int pte_index) + struct trace_event_amdgpu_vm_update_ptes *event, uint64_t *addrs, unsigned int pte_index) { struct vk_rmv_token token; @@ -164,16 +157,13 @@ emit_page_table_update_event(struct vk_memory_trace_data *data, bool is_apu, uin token.timestamp = timestamp; token.data.page_table_update.type = VK_RMV_PAGE_TABLE_UPDATE_TYPE_UPDATE; token.data.page_table_update.page_size = event->incr; - token.data.page_table_update.page_count = - (end_addr - start_addr) * MIN_GPU_PAGE_SIZE / event->incr; + token.data.page_table_update.page_count = (end_addr - start_addr) * MIN_GPU_PAGE_SIZE / event->incr; token.data.page_table_update.pid = event->common.pid; - token.data.page_table_update.virtual_address = - event->start * MIN_GPU_PAGE_SIZE + pte_index * event->incr; + token.data.page_table_update.virtual_address = event->start * MIN_GPU_PAGE_SIZE + pte_index * event->incr; /* RMV expects mappings to system memory to have a physical address of 0. * Even with traces generated by AMDGPU-PRO, on APUs without dedicated VRAM everything seems to * be marked as "committed to system memory". */ - token.data.page_table_update.physical_address = - event->flags & AMDGPU_PTE_SYSTEM || is_apu ? 0 : addrs[pte_index]; + token.data.page_table_update.physical_address = event->flags & AMDGPU_PTE_SYSTEM || is_apu ? 0 : addrs[pte_index]; token.data.page_table_update.is_unmap = !(event->flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)); util_dynarray_append(&data->tokens, struct vk_rmv_token, token); @@ -190,9 +180,8 @@ evaluate_trace_event(struct radv_device *device, uint64_t timestamp, struct util struct trace_event_address_array *array = (struct trace_event_address_array *)(event + 1); for (uint32_t i = 0; i < event->num_ptes; ++i) - emit_page_table_update_event(&device->vk.memory_trace_data, - !device->physical_device->rad_info.has_dedicated_vram, timestamp, - event, (uint64_t *)array->data, i); + emit_page_table_update_event(&device->vk.memory_trace_data, !device->physical_device->rad_info.has_dedicated_vram, + timestamp, event, (uint64_t *)array->data, i); } static void @@ -384,8 +373,8 @@ fill_memory_info(const struct radeon_info *info, struct vk_rmv_memory_info *out_ switch (index) { case VK_RMV_MEMORY_LOCATION_DEVICE: out_info->physical_base_address = 0; - out_info->size = info->all_vram_visible ? (uint64_t)info->vram_size_kb * 1024ULL - : (uint64_t)info->vram_vis_size_kb * 1024ULL; + out_info->size = + info->all_vram_visible ? (uint64_t)info->vram_size_kb * 1024ULL : (uint64_t)info->vram_vis_size_kb * 1024ULL; break; case VK_RMV_MEMORY_LOCATION_DEVICE_INVISIBLE: out_info->physical_base_address = (uint64_t)info->vram_vis_size_kb * 1024ULL; @@ -471,8 +460,8 @@ radv_memory_trace_finish(struct radv_device *device) /* The token lock must be held when entering _locked functions */ static void -log_resource_bind_locked(struct radv_device *device, uint64_t resource, struct radeon_winsys_bo *bo, - uint64_t offset, uint64_t size) +log_resource_bind_locked(struct radv_device *device, uint64_t resource, struct radeon_winsys_bo *bo, uint64_t offset, + uint64_t size) { struct vk_rmv_resource_bind_token token = {0}; token.address = bo->va + offset; @@ -514,8 +503,7 @@ radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool i } void -radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t size, - bool is_internal) +radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t size, bool is_internal) { if (!device->vk.memory_trace_data.is_enabled) return; @@ -523,8 +511,7 @@ radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo struct vk_rmv_virtual_allocate_token token = {0}; token.address = bo->va; /* If all VRAM is visible, no bo will be in invisible memory. */ - token.is_in_invisible_vram = - bo->vram_no_cpu_access && !device->physical_device->rad_info.all_vram_visible; + token.is_in_invisible_vram = bo->vram_no_cpu_access && !device->physical_device->rad_info.all_vram_visible; token.preferred_domains = (enum vk_rmv_kernel_memory_domain)bo->initial_domain; token.is_driver_internal = is_internal; token.page_count = DIV_ROUND_UP(size, 4096); @@ -563,8 +550,8 @@ radv_rmv_log_buffer_bind(struct radv_device *device, VkBuffer _buffer) } void -radv_rmv_log_image_create(struct radv_device *device, const VkImageCreateInfo *create_info, - bool is_internal, VkImage _image) +radv_rmv_log_image_create(struct radv_device *device, const VkImageCreateInfo *create_info, bool is_internal, + VkImage _image) { if (!device->vk.memory_trace_data.is_enabled) return; @@ -608,8 +595,7 @@ radv_rmv_log_image_bind(struct radv_device *device, VkImage _image) RADV_FROM_HANDLE(radv_image, image, _image); simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); - log_resource_bind_locked(device, (uint64_t)_image, image->bindings[0].bo, - image->bindings[0].offset, image->size); + log_resource_bind_locked(device, (uint64_t)_image, image->bindings[0].bo, image->bindings[0].offset, image->size); simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); } @@ -635,16 +621,14 @@ radv_rmv_log_query_pool_create(struct radv_device *device, VkQueryPool _pool, bo create_token.query_pool.type = pool->type; create_token.query_pool.has_cpu_access = true; - vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, - &create_token); + vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token); log_resource_bind_locked(device, (uint64_t)_pool, pool->bo, 0, pool->size); simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); } void -radv_rmv_log_command_buffer_bo_create(struct radv_device *device, struct radeon_winsys_bo *bo, - uint32_t executable_size, uint32_t data_size, - uint32_t scratch_size) +radv_rmv_log_command_buffer_bo_create(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t executable_size, + uint32_t data_size, uint32_t scratch_size) { if (!device->vk.memory_trace_data.is_enabled) return; @@ -660,11 +644,9 @@ radv_rmv_log_command_buffer_bo_create(struct radv_device *device, struct radeon_ simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); struct vk_rmv_resource_create_token create_token = {0}; create_token.is_driver_internal = true; - create_token.resource_id = - vk_rmv_get_resource_id_locked(&device->vk, upload_resource_identifier); + create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, upload_resource_identifier); create_token.type = VK_RMV_RESOURCE_TYPE_COMMAND_ALLOCATOR; - create_token.command_buffer.preferred_domain = - (enum vk_rmv_kernel_memory_domain)device->ws->cs_domain(device->ws); + create_token.command_buffer.preferred_domain = (enum vk_rmv_kernel_memory_domain)device->ws->cs_domain(device->ws); create_token.command_buffer.executable_size = executable_size; create_token.command_buffer.app_available_executable_size = executable_size; create_token.command_buffer.embedded_data_size = data_size; @@ -672,8 +654,7 @@ radv_rmv_log_command_buffer_bo_create(struct radv_device *device, struct radeon_ create_token.command_buffer.scratch_size = scratch_size; create_token.command_buffer.app_available_scratch_size = scratch_size; - vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, - &create_token); + vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token); log_resource_bind_locked(device, upload_resource_identifier, bo, 0, size); simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); vk_rmv_log_cpu_map(&device->vk, bo->va, false); @@ -689,8 +670,7 @@ radv_rmv_log_command_buffer_bo_destroy(struct radv_device *device, struct radeon struct vk_rmv_resource_destroy_token destroy_token = {0}; destroy_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo); - vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, - &destroy_token); + vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &destroy_token); vk_rmv_destroy_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo); simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); radv_rmv_log_bo_destroy(device, bo); @@ -722,8 +702,7 @@ radv_rmv_log_border_color_palette_create(struct radv_device *device, struct rade bind_token.resource_id = resource_id; bind_token.size = RADV_BORDER_COLOR_BUFFER_SIZE; - vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, - &create_token); + vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token); vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &bind_token); simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); vk_rmv_log_cpu_map(&device->vk, bo->va, false); @@ -746,8 +725,7 @@ radv_rmv_log_border_color_palette_destroy(struct radv_device *device, struct rad } void -radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, - uint64_t offset) +radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset) { if (!device->vk.memory_trace_data.is_enabled) return; @@ -763,8 +741,7 @@ radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_wins } void -radv_rmv_log_sparse_remove_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, - uint64_t offset) +radv_rmv_log_sparse_remove_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset) { if (!device->vk.memory_trace_data.is_enabled) return; @@ -780,8 +757,7 @@ radv_rmv_log_sparse_remove_residency(struct radv_device *device, struct radeon_w } void -radv_rmv_log_descriptor_pool_create(struct radv_device *device, - const VkDescriptorPoolCreateInfo *create_info, +radv_rmv_log_descriptor_pool_create(struct radv_device *device, const VkDescriptorPoolCreateInfo *create_info, VkDescriptorPool _pool, bool is_internal) { if (!device->vk.memory_trace_data.is_enabled) @@ -802,16 +778,14 @@ radv_rmv_log_descriptor_pool_create(struct radv_device *device, create_token.descriptor_pool.max_sets = create_info->maxSets; create_token.descriptor_pool.pool_size_count = create_info->poolSizeCount; /* Using vk_rmv_token_pool_alloc frees the allocation automatically when the trace is done. */ - create_token.descriptor_pool.pool_sizes = - malloc(create_info->poolSizeCount * sizeof(VkDescriptorPoolSize)); + create_token.descriptor_pool.pool_sizes = malloc(create_info->poolSizeCount * sizeof(VkDescriptorPoolSize)); if (!create_token.descriptor_pool.pool_sizes) return; memcpy(create_token.descriptor_pool.pool_sizes, create_info->pPoolSizes, create_info->poolSizeCount * sizeof(VkDescriptorPoolSize)); - vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, - &create_token); + vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token); simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); if (pool->bo) { @@ -822,8 +796,7 @@ radv_rmv_log_descriptor_pool_create(struct radv_device *device, bind_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool); bind_token.size = pool->size; - vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, - &bind_token); + vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &bind_token); simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); } } @@ -848,16 +821,14 @@ radv_rmv_log_graphics_pipeline_create(struct radv_device *device, VkPipelineCrea create_token.pipeline.is_ngg = graphics_pipeline->is_ngg; create_token.pipeline.shader_stages = graphics_pipeline->active_stages; - vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, - &create_token); + vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token); for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) { struct radv_shader *shader = pipeline->shaders[s]; if (!shader) continue; - log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, - shader->alloc->size); + log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size); } simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); } @@ -871,9 +842,8 @@ radv_rmv_log_compute_pipeline_create(struct radv_device *device, VkPipelineCreat VkPipeline _pipeline = radv_pipeline_to_handle(pipeline); - VkShaderStageFlagBits active_stages = pipeline->type == RADV_PIPELINE_COMPUTE - ? VK_SHADER_STAGE_COMPUTE_BIT - : VK_SHADER_STAGE_RAYGEN_BIT_KHR; + VkShaderStageFlagBits active_stages = + pipeline->type == RADV_PIPELINE_COMPUTE ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_RAYGEN_BIT_KHR; simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); struct vk_rmv_resource_create_token create_token = {0}; @@ -885,17 +855,14 @@ radv_rmv_log_compute_pipeline_create(struct radv_device *device, VkPipelineCreat create_token.pipeline.is_ngg = false; create_token.pipeline.shader_stages = active_stages; - vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, - &create_token); + vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token); struct radv_shader *shader = pipeline->shaders[vk_to_mesa_shader_stage(active_stages)]; - log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, - shader->alloc->size); + log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size); simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); } void -radv_rmv_log_event_create(struct radv_device *device, VkEvent _event, VkEventCreateFlags flags, - bool is_internal) +radv_rmv_log_event_create(struct radv_device *device, VkEvent _event, VkEventCreateFlags flags, bool is_internal) { if (!device->vk.memory_trace_data.is_enabled) return; @@ -910,8 +877,7 @@ radv_rmv_log_event_create(struct radv_device *device, VkEvent _event, VkEventCre create_token.event.flags = flags; create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_event); - vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, - &create_token); + vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token); log_resource_bind_locked(device, (uint64_t)_event, event->bo, 0, 8); simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); diff --git a/src/amd/vulkan/radv_rra.c b/src/amd/vulkan/radv_rra.c index 1c51c08..769a784 100644 --- a/src/amd/vulkan/radv_rra.c +++ b/src/amd/vulkan/radv_rra.c @@ -67,8 +67,7 @@ struct rra_file_chunk_description { uint64_t unused; }; -static_assert(sizeof(struct rra_file_chunk_description) == 64, - "rra_file_chunk_description does not match RRA spec"); +static_assert(sizeof(struct rra_file_chunk_description) == 64, "rra_file_chunk_description does not match RRA spec"); static uint64_t node_to_addr(uint64_t node) @@ -91,8 +90,8 @@ rra_dump_header(FILE *output, uint64_t chunk_descriptions_offset, uint64_t chunk } static void -rra_dump_chunk_description(uint64_t offset, uint64_t header_size, uint64_t data_size, - const char *name, enum rra_chunk_type type, FILE *output) +rra_dump_chunk_description(uint64_t offset, uint64_t header_size, uint64_t data_size, const char *name, + enum rra_chunk_type type, FILE *output) { struct rra_file_chunk_description chunk = { .type = type, @@ -178,8 +177,7 @@ rra_dump_asic_info(const struct radeon_info *rad_info, FILE *output) .rev_id = rad_info->pci_rev_id, }; - strncpy(asic_info.device_name, - rad_info->marketing_name ? rad_info->marketing_name : rad_info->name, + strncpy(asic_info.device_name, rad_info->marketing_name ? rad_info->marketing_name : rad_info->name, RRA_FILE_DEVICE_NAME_MAX_SIZE - 1); fwrite(&asic_info, sizeof(struct rra_asic_info), 1, output); @@ -246,8 +244,7 @@ struct rra_accel_struct_header { #define RRA_ROOT_NODE_OFFSET align(sizeof(struct rra_accel_struct_header), 64) -static_assert(sizeof(struct rra_accel_struct_header) == 120, - "rra_accel_struct_header does not match RRA spec"); +static_assert(sizeof(struct rra_accel_struct_header) == 120, "rra_accel_struct_header does not match RRA spec"); struct rra_accel_struct_metadata { uint64_t virtual_address; @@ -255,8 +252,7 @@ struct rra_accel_struct_metadata { char unused[116]; }; -static_assert(sizeof(struct rra_accel_struct_metadata) == 128, - "rra_accel_struct_metadata does not match RRA spec"); +static_assert(sizeof(struct rra_accel_struct_metadata) == 128, "rra_accel_struct_metadata does not match RRA spec"); struct rra_geometry_info { uint32_t primitive_count : 29; @@ -268,9 +264,9 @@ struct rra_geometry_info { static_assert(sizeof(struct rra_geometry_info) == 12, "rra_geometry_info does not match RRA spec"); static struct rra_accel_struct_header -rra_fill_accel_struct_header_common(struct radv_accel_struct_header *header, - size_t parent_id_table_size, size_t leaf_node_data_size, - size_t internal_node_data_size, uint64_t primitive_count) +rra_fill_accel_struct_header_common(struct radv_accel_struct_header *header, size_t parent_id_table_size, + size_t leaf_node_data_size, size_t internal_node_data_size, + uint64_t primitive_count) { struct rra_accel_struct_header result = { .post_build_info = @@ -288,8 +284,8 @@ rra_fill_accel_struct_header_common(struct radv_accel_struct_header *header, }; result.metadata_size = sizeof(struct rra_accel_struct_metadata) + parent_id_table_size; - result.file_size = result.metadata_size + sizeof(struct rra_accel_struct_header) + - internal_node_data_size + leaf_node_data_size; + result.file_size = + result.metadata_size + sizeof(struct rra_accel_struct_header) + internal_node_data_size + leaf_node_data_size; result.internal_nodes_offset = sizeof(struct rra_accel_struct_metadata); result.leaf_nodes_offset = result.internal_nodes_offset + internal_node_data_size; @@ -331,8 +327,7 @@ struct rra_instance_node { float otw_matrix[12]; }; -static_assert(sizeof(struct rra_instance_node) == 128, - "rra_instance_node does not match RRA spec!"); +static_assert(sizeof(struct rra_instance_node) == 128, "rra_instance_node does not match RRA spec!"); /* * Format RRA uses for aabb nodes @@ -361,9 +356,8 @@ struct rra_triangle_node { static_assert(sizeof(struct rra_triangle_node) == 64, "rra_triangle_node does not match RRA spec!"); static void -rra_dump_tlas_header(struct radv_accel_struct_header *header, size_t parent_id_table_size, - size_t leaf_node_data_size, size_t internal_node_data_size, - uint64_t primitive_count, FILE *output) +rra_dump_tlas_header(struct radv_accel_struct_header *header, size_t parent_id_table_size, size_t leaf_node_data_size, + size_t internal_node_data_size, uint64_t primitive_count, FILE *output) { struct rra_accel_struct_header file_header = rra_fill_accel_struct_header_common( header, parent_id_table_size, leaf_node_data_size, internal_node_data_size, primitive_count); @@ -375,15 +369,13 @@ rra_dump_tlas_header(struct radv_accel_struct_header *header, size_t parent_id_t static void rra_dump_blas_header(struct radv_accel_struct_header *header, size_t parent_id_table_size, - struct radv_accel_struct_geometry_info *geometry_infos, - size_t leaf_node_data_size, size_t internal_node_data_size, - uint64_t primitive_count, FILE *output) + struct radv_accel_struct_geometry_info *geometry_infos, size_t leaf_node_data_size, + size_t internal_node_data_size, uint64_t primitive_count, FILE *output) { struct rra_accel_struct_header file_header = rra_fill_accel_struct_header_common( header, parent_id_table_size, leaf_node_data_size, internal_node_data_size, primitive_count); file_header.post_build_info.bvh_type = RRA_BVH_TYPE_BLAS; - file_header.geometry_type = - header->geometry_count ? geometry_infos->type : VK_GEOMETRY_TYPE_TRIANGLES_KHR; + file_header.geometry_type = header->geometry_count ? geometry_infos->type : VK_GEOMETRY_TYPE_TRIANGLES_KHR; fwrite(&file_header, sizeof(struct rra_accel_struct_header), 1, output); } @@ -400,8 +392,7 @@ struct rra_validation_context { char location[31]; }; -static void PRINTFLIKE(2, 3) -rra_validation_fail(struct rra_validation_context *ctx, const char *message, ...) +static void PRINTFLIKE(2, 3) rra_validation_fail(struct rra_validation_context *ctx, const char *message, ...) { if (!ctx->failed) { fprintf(stderr, "radv: rra: Validation failed at %s:\n", ctx->location); @@ -419,15 +410,13 @@ rra_validation_fail(struct rra_validation_context *ctx, const char *message, ... } static bool -rra_validate_header(struct radv_rra_accel_struct_data *accel_struct, - const struct radv_accel_struct_header *header) +rra_validate_header(struct radv_rra_accel_struct_data *accel_struct, const struct radv_accel_struct_header *header) { struct rra_validation_context ctx = { .location = "header", }; - if (accel_struct->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR && - header->instance_count > 0) + if (accel_struct->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR && header->instance_count > 0) rra_validation_fail(&ctx, "BLAS contains instances"); if (header->bvh_offset >= accel_struct->size) @@ -457,8 +446,8 @@ static const char *node_type_names[8] = { }; static bool -rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data, void *node, - uint32_t geometry_count, uint32_t size, bool is_bottom_level) +rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data, void *node, uint32_t geometry_count, + uint32_t size, bool is_bottom_level) { struct rra_validation_context ctx = {0}; @@ -476,8 +465,7 @@ rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data, void * if (!is_internal_node(type) && is_bottom_level == (type == radv_bvh_node_instance)) rra_validation_fail(&ctx, - is_bottom_level ? "%s node in BLAS (child index %u)" - : "%s node in TLAS (child index %u)", + is_bottom_level ? "%s node in BLAS (child index %u)" : "%s node in TLAS (child index %u)", node_type_names[type], i); if (offset > size) { @@ -486,12 +474,10 @@ rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data, void * } struct rra_validation_context child_ctx = {0}; - snprintf(child_ctx.location, sizeof(child_ctx.location), "%s node (offset=%u)", - node_type_names[type], offset); + snprintf(child_ctx.location, sizeof(child_ctx.location), "%s node (offset=%u)", node_type_names[type], offset); if (is_internal_node(type)) { - ctx.failed |= rra_validate_node(accel_struct_vas, data, data + offset, geometry_count, - size, is_bottom_level); + ctx.failed |= rra_validate_node(accel_struct_vas, data, data + offset, geometry_count, size, is_bottom_level); } else if (type == radv_bvh_node_instance) { struct radv_bvh_instance_node *src = (struct radv_bvh_instance_node *)(data + offset); uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset; @@ -525,8 +511,7 @@ struct rra_transcoding_context { }; static void -rra_transcode_triangle_node(struct rra_transcoding_context *ctx, - const struct radv_bvh_triangle_node *src) +rra_transcode_triangle_node(struct rra_transcoding_context *ctx, const struct radv_bvh_triangle_node *src) { struct rra_triangle_node *dst = (struct rra_triangle_node *)(ctx->dst + ctx->dst_leaf_offset); ctx->dst_leaf_offset += sizeof(struct rra_triangle_node); @@ -541,8 +526,7 @@ rra_transcode_triangle_node(struct rra_transcoding_context *ctx, } static void -rra_transcode_aabb_node(struct rra_transcoding_context *ctx, const struct radv_bvh_aabb_node *src, - radv_aabb bounds) +rra_transcode_aabb_node(struct rra_transcoding_context *ctx, const struct radv_bvh_aabb_node *src, radv_aabb bounds) { struct rra_aabb_node *dst = (struct rra_aabb_node *)(ctx->dst + ctx->dst_leaf_offset); ctx->dst_leaf_offset += sizeof(struct rra_aabb_node); @@ -560,8 +544,7 @@ rra_transcode_aabb_node(struct rra_transcoding_context *ctx, const struct radv_b } static void -rra_transcode_instance_node(struct rra_transcoding_context *ctx, - const struct radv_bvh_instance_node *src) +rra_transcode_instance_node(struct rra_transcoding_context *ctx, const struct radv_bvh_instance_node *src) { uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset; @@ -580,8 +563,8 @@ rra_transcode_instance_node(struct rra_transcoding_context *ctx, memcpy(dst->otw_matrix, src->otw_matrix.values, sizeof(dst->otw_matrix)); } -static uint32_t rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id, - uint32_t src_id, radv_aabb bounds); +static uint32_t rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id, + radv_aabb bounds); static void rra_transcode_box16_node(struct rra_transcoding_context *ctx, const struct radv_bvh_box16_node *src) @@ -613,8 +596,7 @@ rra_transcode_box16_node(struct rra_transcoding_context *ctx, const struct radv_ }, }; - dst->children[i] = - rra_transcode_node(ctx, radv_bvh_node_box16 | (dst_offset >> 3), src->children[i], bounds); + dst->children[i] = rra_transcode_node(ctx, radv_bvh_node_box16 | (dst_offset >> 3), src->children[i], bounds); } } @@ -633,8 +615,8 @@ rra_transcode_box32_node(struct rra_transcoding_context *ctx, const struct radv_ continue; } - dst->children[i] = rra_transcode_node(ctx, radv_bvh_node_box32 | (dst_offset >> 3), - src->children[i], src->coords[i]); + dst->children[i] = + rra_transcode_node(ctx, radv_bvh_node_box32 | (dst_offset >> 3), src->children[i], src->coords[i]); } } @@ -655,8 +637,7 @@ get_geometry_id(const void *node, uint32_t node_type) } static uint32_t -rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id, - radv_aabb bounds) +rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id, radv_aabb bounds) { uint32_t node_type = src_id & 7; uint32_t src_offset = (src_id & (~7u)) << 3; @@ -681,8 +662,7 @@ rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id, uint rra_transcode_instance_node(ctx, src_child_node); } - uint32_t parent_id_index = - rra_parent_table_index_from_offset(dst_offset, ctx->parent_id_table_size); + uint32_t parent_id_index = rra_parent_table_index_from_offset(dst_offset, ctx->parent_id_table_size); ctx->parent_id_table[parent_id_index] = parent_id; uint32_t dst_id = node_type | (dst_offset >> 3); @@ -737,8 +717,7 @@ rra_gather_bvh_info(const uint8_t *bvh, uint32_t node_id, struct rra_bvh_info *d static VkResult rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct, uint8_t *data, - struct hash_table_u64 *accel_struct_vas, bool should_validate, - FILE *output) + struct hash_table_u64 *accel_struct_vas, bool should_validate, FILE *output) { struct radv_accel_struct_header *header = (struct radv_accel_struct_header *)data; @@ -753,9 +732,8 @@ rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct, if (rra_validate_header(accel_struct, header)) { return VK_ERROR_VALIDATION_FAILED_EXT; } - if (rra_validate_node(accel_struct_vas, data + header->bvh_offset, - data + header->bvh_offset + src_root_offset, header->geometry_count, - accel_struct->size, !is_tlas)) { + if (rra_validate_node(accel_struct_vas, data + header->bvh_offset, data + header->bvh_offset + src_root_offset, + header->geometry_count, accel_struct->size, !is_tlas)) { return VK_ERROR_VALIDATION_FAILED_EXT; } } @@ -811,8 +789,7 @@ rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct, result = VK_ERROR_OUT_OF_HOST_MEMORY; goto exit; } - dst_structure_data = - calloc(RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size, 1); + dst_structure_data = calloc(RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size, 1); if (!dst_structure_data) { result = VK_ERROR_OUT_OF_HOST_MEMORY; goto exit; @@ -858,8 +835,7 @@ rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct, struct rra_accel_struct_metadata rra_metadata = { .virtual_address = va, - .byte_size = bvh_info.leaf_nodes_size + bvh_info.internal_nodes_size + - sizeof(struct rra_accel_struct_header), + .byte_size = bvh_info.leaf_nodes_size + bvh_info.internal_nodes_size + sizeof(struct rra_accel_struct_header), }; fwrite(&chunk_header, sizeof(struct rra_accel_struct_chunk_header), 1, output); @@ -869,15 +845,15 @@ rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct, fwrite(node_parent_table, 1, node_parent_table_size, output); if (is_tlas) - rra_dump_tlas_header(header, node_parent_table_size, bvh_info.leaf_nodes_size, - bvh_info.internal_nodes_size, primitive_count, output); + rra_dump_tlas_header(header, node_parent_table_size, bvh_info.leaf_nodes_size, bvh_info.internal_nodes_size, + primitive_count, output); else rra_dump_blas_header(header, node_parent_table_size, geometry_infos, bvh_info.leaf_nodes_size, bvh_info.internal_nodes_size, primitive_count, output); /* Write acceleration structure data */ - fwrite(dst_structure_data + RRA_ROOT_NODE_OFFSET, 1, - bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size, output); + fwrite(dst_structure_data + RRA_ROOT_NODE_OFFSET, 1, bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size, + output); if (!is_tlas) fwrite(rra_geometry_infos, sizeof(struct rra_geometry_info), header->geometry_count, output); @@ -921,16 +897,14 @@ radv_rra_trace_init(struct radv_device *device) device->rra_trace.elapsed_frames = 0; device->rra_trace.trigger_file = radv_rra_trace_trigger_file(); device->rra_trace.validate_as = debug_get_bool_option("RADV_RRA_TRACE_VALIDATE", false); - device->rra_trace.copy_after_build = - debug_get_bool_option("RADV_RRA_TRACE_COPY_AFTER_BUILD", false); + device->rra_trace.copy_after_build = debug_get_bool_option("RADV_RRA_TRACE_COPY_AFTER_BUILD", false); device->rra_trace.accel_structs = _mesa_pointer_hash_table_create(NULL); device->rra_trace.accel_struct_vas = _mesa_hash_table_u64_create(NULL); simple_mtx_init(&device->rra_trace.data_mtx, mtx_plain); - device->rra_trace.copy_memory_index = - radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + device->rra_trace.copy_memory_index = radv_find_memory_index( + device->physical_device, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT); } void @@ -1036,8 +1010,7 @@ rra_copy_context_init(struct rra_copy_context *ctx) if (result != VK_SUCCESS) goto fail_buffer; - result = - vk_common_MapMemory(ctx->device, ctx->memory, 0, VK_WHOLE_SIZE, 0, (void **)&ctx->mapped_data); + result = vk_common_MapMemory(ctx->device, ctx->memory, 0, VK_WHOLE_SIZE, 0, (void **)&ctx->mapped_data); if (result != VK_SUCCESS) goto fail_memory; @@ -1186,9 +1159,7 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename) uint64_t written_accel_struct_count = 0; struct hash_entry *last_entry = NULL; - for (unsigned i = 0; - (last_entry = _mesa_hash_table_next_entry(device->rra_trace.accel_structs, last_entry)); - ++i) + for (unsigned i = 0; (last_entry = _mesa_hash_table_next_entry(device->rra_trace.accel_structs, last_entry)); ++i) hash_entries[i] = last_entry; qsort(hash_entries, struct_count, sizeof(*hash_entries), accel_struct_entry_cmp); @@ -1215,9 +1186,8 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename) continue; accel_struct_offsets[written_accel_struct_count] = (uint64_t)ftell(file); - result = - rra_dump_acceleration_structure(data, mapped_data, device->rra_trace.accel_struct_vas, - device->rra_trace.validate_as, file); + result = rra_dump_acceleration_structure(data, mapped_data, device->rra_trace.accel_struct_vas, + device->rra_trace.validate_as, file); rra_unmap_accel_struct_data(©_ctx, i); @@ -1228,8 +1198,7 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename) rra_copy_context_finish(©_ctx); uint64_t chunk_info_offset = (uint64_t)ftell(file); - rra_dump_chunk_description(api_info_offset, 0, 8, "ApiInfo", RADV_RRA_CHUNK_ID_ASIC_API_INFO, - file); + rra_dump_chunk_description(api_info_offset, 0, 8, "ApiInfo", RADV_RRA_CHUNK_ID_ASIC_API_INFO, file); rra_dump_chunk_description(asic_info_offset, 0, sizeof(struct rra_asic_info), "AsicInfo", RADV_RRA_CHUNK_ID_ASIC_API_INFO, file); @@ -1240,9 +1209,8 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename) else accel_struct_size = (uint64_t)(accel_struct_offsets[i + 1] - accel_struct_offsets[i]); - rra_dump_chunk_description(accel_struct_offsets[i], - sizeof(struct rra_accel_struct_chunk_header), accel_struct_size, - "RawAccelStruct", RADV_RRA_CHUNK_ID_ACCEL_STRUCT, file); + rra_dump_chunk_description(accel_struct_offsets[i], sizeof(struct rra_accel_struct_chunk_header), + accel_struct_size, "RawAccelStruct", RADV_RRA_CHUNK_ID_ACCEL_STRUCT, file); } uint64_t file_end = (uint64_t)ftell(file); diff --git a/src/amd/vulkan/radv_rt_common.c b/src/amd/vulkan/radv_rt_common.c index 1bab37f..26c4d68 100644 --- a/src/amd/vulkan/radv_rt_common.c +++ b/src/amd/vulkan/radv_rt_common.c @@ -29,8 +29,8 @@ #include #endif -static nir_ssa_def *build_node_to_addr(struct radv_device *device, nir_builder *b, - nir_ssa_def *node, bool skip_type_and); +static nir_ssa_def *build_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node, + bool skip_type_and); bool radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines) @@ -62,26 +62,24 @@ radv_emulate_rt(const struct radv_physical_device *pdevice) } void -nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices, - uint32_t chan_1, uint32_t chan_2) +nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices, uint32_t chan_1, + uint32_t chan_2) { nir_ssa_def *ssa_distances = nir_load_var(b, var_distances); nir_ssa_def *ssa_indices = nir_load_var(b, var_indices); /* if (distances[chan_2] < distances[chan_1]) { */ - nir_push_if( - b, nir_flt(b, nir_channel(b, ssa_distances, chan_2), nir_channel(b, ssa_distances, chan_1))); + nir_push_if(b, nir_flt(b, nir_channel(b, ssa_distances, chan_2), nir_channel(b, ssa_distances, chan_1))); { /* swap(distances[chan_2], distances[chan_1]); */ - nir_ssa_def *new_distances[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), - nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32)}; - nir_ssa_def *new_indices[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), - nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32)}; + nir_ssa_def *new_distances[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), + nir_ssa_undef(b, 1, 32)}; + nir_ssa_def *new_indices[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), + nir_ssa_undef(b, 1, 32)}; new_distances[chan_2] = nir_channel(b, ssa_distances, chan_1); new_distances[chan_1] = nir_channel(b, ssa_distances, chan_2); new_indices[chan_2] = nir_channel(b, ssa_indices, chan_1); new_indices[chan_1] = nir_channel(b, ssa_indices, chan_2); - nir_store_var(b, var_distances, nir_vec(b, new_distances, 4), - (1u << chan_1) | (1u << chan_2)); + nir_store_var(b, var_distances, nir_vec(b, new_distances, 4), (1u << chan_1) | (1u << chan_2)); nir_store_var(b, var_indices, nir_vec(b, new_indices, 4), (1u << chan_1) | (1u << chan_2)); } /* } */ @@ -89,9 +87,8 @@ nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var } nir_ssa_def * -intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node, - nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir, - nir_ssa_def *inv_dir) +intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node, nir_ssa_def *ray_tmax, + nir_ssa_def *origin, nir_ssa_def *dir, nir_ssa_def *inv_dir) { const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4); const struct glsl_type *uvec4_type = glsl_vector_type(GLSL_TYPE_UINT, 4); @@ -99,15 +96,12 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node, false); /* vec4 distances = vec4(INF, INF, INF, INF); */ - nir_variable *distances = - nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "distances"); + nir_variable *distances = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "distances"); nir_store_var(b, distances, nir_imm_vec4(b, INFINITY, INFINITY, INFINITY, INFINITY), 0xf); /* uvec4 child_indices = uvec4(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff); */ - nir_variable *child_indices = - nir_variable_create(b->shader, nir_var_shader_temp, uvec4_type, "child_indices"); - nir_store_var(b, child_indices, - nir_imm_ivec4(b, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu), 0xf); + nir_variable *child_indices = nir_variable_create(b->shader, nir_var_shader_temp, uvec4_type, "child_indices"); + nir_store_var(b, child_indices, nir_imm_ivec4(b, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu), 0xf); /* Need to remove infinities here because otherwise we get nasty NaN propagation * if the direction has 0s in it. */ @@ -122,15 +116,14 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s }; /* node->children[i] -> uint */ - nir_ssa_def *child_index = - nir_build_load_global(b, 1, 32, nir_iadd_imm(b, node_addr, child_offset), .align_mul = 64, - .align_offset = child_offset % 64); + nir_ssa_def *child_index = nir_build_load_global(b, 1, 32, nir_iadd_imm(b, node_addr, child_offset), + .align_mul = 64, .align_offset = child_offset % 64); /* node->coords[i][0], node->coords[i][1] -> vec3 */ nir_ssa_def *node_coords[2] = { - nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0]), - .align_mul = 64, .align_offset = coord_offsets[0] % 64), - nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1]), - .align_mul = 64, .align_offset = coord_offsets[1] % 64), + nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0]), .align_mul = 64, + .align_offset = coord_offsets[0] % 64), + nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1]), .align_mul = 64, + .align_offset = coord_offsets[1] % 64), }; /* If x of the aabb min is NaN, then this is an inactive aabb. @@ -138,8 +131,7 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s * https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap36.html#VkAabbPositionsKHR */ nir_ssa_def *min_x = nir_channel(b, node_coords[0], 0); - nir_ssa_def *min_x_is_not_nan = - nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */ + nir_ssa_def *min_x_is_not_nan = nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */ /* vec3 bound0 = (node->coords[i][0] - origin) * inv_dir; */ nir_ssa_def *bound0 = nir_fmul(b, nir_fsub(b, node_coords[0], origin), inv_dir); @@ -148,25 +140,22 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s /* float tmin = max(max(min(bound0.x, bound1.x), min(bound0.y, bound1.y)), min(bound0.z, * bound1.z)); */ - nir_ssa_def *tmin = - nir_fmax(b, - nir_fmax(b, nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)), - nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))), - nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2))); + nir_ssa_def *tmin = nir_fmax(b, + nir_fmax(b, nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)), + nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))), + nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2))); /* float tmax = min(min(max(bound0.x, bound1.x), max(bound0.y, bound1.y)), max(bound0.z, * bound1.z)); */ - nir_ssa_def *tmax = - nir_fmin(b, - nir_fmin(b, nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)), - nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))), - nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2))); + nir_ssa_def *tmax = nir_fmin(b, + nir_fmin(b, nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)), + nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))), + nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2))); /* if (!isnan(node->coords[i][0].x) && tmax >= max(0.0f, tmin) && tmin < ray_tmax) { */ - nir_push_if(b, - nir_iand(b, min_x_is_not_nan, - nir_iand(b, nir_fge(b, tmax, nir_fmax(b, nir_imm_float(b, 0.0f), tmin)), - nir_flt(b, tmin, ray_tmax)))); + nir_push_if(b, nir_iand(b, min_x_is_not_nan, + nir_iand(b, nir_fge(b, tmax, nir_fmax(b, nir_imm_float(b, 0.0f), tmin)), + nir_flt(b, tmin, ray_tmax)))); { /* child_indices[i] = node->children[i]; */ nir_ssa_def *new_child_indices[4] = {child_index, child_index, child_index, child_index}; @@ -191,9 +180,8 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s } nir_ssa_def * -intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node, - nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir, - nir_ssa_def *inv_dir) +intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node, nir_ssa_def *ray_tmax, + nir_ssa_def *origin, nir_ssa_def *dir, nir_ssa_def *inv_dir) { const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4); @@ -230,10 +218,10 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s nir_channel(b, abs_dir, 2), }; /* Find index of greatest value of abs_dir and put that as kz. */ - nir_ssa_def *kz = nir_bcsel( - b, nir_fge(b, abs_dirs[0], abs_dirs[1]), - nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[2]), nir_imm_int(b, 0), nir_imm_int(b, 2)), - nir_bcsel(b, nir_fge(b, abs_dirs[1], abs_dirs[2]), nir_imm_int(b, 1), nir_imm_int(b, 2))); + nir_ssa_def *kz = + nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[1]), + nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[2]), nir_imm_int(b, 0), nir_imm_int(b, 2)), + nir_bcsel(b, nir_fge(b, abs_dirs[1], abs_dirs[2]), nir_imm_int(b, 1), nir_imm_int(b, 2))); nir_ssa_def *kx = nir_imod_imm(b, nir_iadd_imm(b, kz, 1), 3); nir_ssa_def *ky = nir_imod_imm(b, nir_iadd_imm(b, kx, 1), 3); nir_ssa_def *k_indices[3] = {kx, ky, kz}; @@ -241,8 +229,7 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s /* Swap kx and ky dimensions to preserve winding order */ unsigned swap_xy_swizzle[4] = {1, 0, 2, 3}; - k = nir_bcsel(b, nir_flt_imm(b, nir_vector_extract(b, dir, kz), 0.0f), - nir_swizzle(b, k, swap_xy_swizzle, 3), k); + k = nir_bcsel(b, nir_flt_imm(b, nir_vector_extract(b, dir, kz), 0.0f), nir_swizzle(b, k, swap_xy_swizzle, 3), k); kx = nir_channel(b, k, 0); ky = nir_channel(b, k, 1); @@ -259,29 +246,20 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s nir_ssa_def *v_c = nir_fsub(b, node_coords[2], origin); /* Perform shear and scale */ - nir_ssa_def *ax = - nir_fsub(b, nir_vector_extract(b, v_a, kx), nir_fmul(b, sx, nir_vector_extract(b, v_a, kz))); - nir_ssa_def *ay = - nir_fsub(b, nir_vector_extract(b, v_a, ky), nir_fmul(b, sy, nir_vector_extract(b, v_a, kz))); - nir_ssa_def *bx = - nir_fsub(b, nir_vector_extract(b, v_b, kx), nir_fmul(b, sx, nir_vector_extract(b, v_b, kz))); - nir_ssa_def *by = - nir_fsub(b, nir_vector_extract(b, v_b, ky), nir_fmul(b, sy, nir_vector_extract(b, v_b, kz))); - nir_ssa_def *cx = - nir_fsub(b, nir_vector_extract(b, v_c, kx), nir_fmul(b, sx, nir_vector_extract(b, v_c, kz))); - nir_ssa_def *cy = - nir_fsub(b, nir_vector_extract(b, v_c, ky), nir_fmul(b, sy, nir_vector_extract(b, v_c, kz))); + nir_ssa_def *ax = nir_fsub(b, nir_vector_extract(b, v_a, kx), nir_fmul(b, sx, nir_vector_extract(b, v_a, kz))); + nir_ssa_def *ay = nir_fsub(b, nir_vector_extract(b, v_a, ky), nir_fmul(b, sy, nir_vector_extract(b, v_a, kz))); + nir_ssa_def *bx = nir_fsub(b, nir_vector_extract(b, v_b, kx), nir_fmul(b, sx, nir_vector_extract(b, v_b, kz))); + nir_ssa_def *by = nir_fsub(b, nir_vector_extract(b, v_b, ky), nir_fmul(b, sy, nir_vector_extract(b, v_b, kz))); + nir_ssa_def *cx = nir_fsub(b, nir_vector_extract(b, v_c, kx), nir_fmul(b, sx, nir_vector_extract(b, v_c, kz))); + nir_ssa_def *cy = nir_fsub(b, nir_vector_extract(b, v_c, ky), nir_fmul(b, sy, nir_vector_extract(b, v_c, kz))); nir_ssa_def *u = nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx)); nir_ssa_def *v = nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx)); nir_ssa_def *w = nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax)); - nir_variable *u_var = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "u"); - nir_variable *v_var = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "v"); - nir_variable *w_var = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "w"); + nir_variable *u_var = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "u"); + nir_variable *v_var = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "v"); + nir_variable *w_var = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "w"); nir_store_var(b, u_var, u, 0x1); nir_store_var(b, v_var, v, 0x1); nir_store_var(b, w_var, w, 0x1); @@ -291,9 +269,8 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s * The Vulkan spec states it only needs single precision watertightness * but we fail dEQP-VK.ray_tracing_pipeline.watertightness.closedFan2.1024 with * failures = 1 without doing this. :( */ - nir_ssa_def *cond_retest = nir_ior( - b, nir_ior(b, nir_feq_imm(b, u, 0.0f), nir_feq_imm(b, v, 0.0f)), - nir_feq_imm(b, w, 0.0f)); + nir_ssa_def *cond_retest = + nir_ior(b, nir_ior(b, nir_feq_imm(b, u, 0.0f), nir_feq_imm(b, v, 0.0f)), nir_feq_imm(b, w, 0.0f)); nir_push_if(b, cond_retest); { @@ -304,12 +281,9 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s cx = nir_f2f64(b, cx); cy = nir_f2f64(b, cy); - nir_store_var(b, u_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx))), - 0x1); - nir_store_var(b, v_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx))), - 0x1); - nir_store_var(b, w_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax))), - 0x1); + nir_store_var(b, u_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx))), 0x1); + nir_store_var(b, v_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx))), 0x1); + nir_store_var(b, w_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax))), 0x1); } nir_pop_if(b, NULL); @@ -318,13 +292,11 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s w = nir_load_var(b, w_var); /* Perform edge tests. */ - nir_ssa_def *cond_back = nir_ior( - b, nir_ior(b, nir_flt_imm(b, u, 0.0f), nir_flt_imm(b, v, 0.0f)), - nir_flt_imm(b, w, 0.0f)); + nir_ssa_def *cond_back = + nir_ior(b, nir_ior(b, nir_flt_imm(b, u, 0.0f), nir_flt_imm(b, v, 0.0f)), nir_flt_imm(b, w, 0.0f)); - nir_ssa_def *cond_front = nir_ior( - b, nir_ior(b, nir_fgt_imm(b, u, 0.0f), nir_fgt_imm(b, v, 0.0f)), - nir_flt(b, nir_imm_float(b, 0.0f), w)); + nir_ssa_def *cond_front = + nir_ior(b, nir_ior(b, nir_fgt_imm(b, u, 0.0f), nir_fgt_imm(b, v, 0.0f)), nir_flt(b, nir_imm_float(b, 0.0f), w)); nir_ssa_def *cond = nir_inot(b, nir_iand(b, cond_back, cond_front)); @@ -336,8 +308,7 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s nir_ssa_def *bz = nir_fmul(b, sz, nir_vector_extract(b, v_b, kz)); nir_ssa_def *cz = nir_fmul(b, sz, nir_vector_extract(b, v_c, kz)); - nir_ssa_def *t = - nir_fadd(b, nir_fadd(b, nir_fmul(b, u, az), nir_fmul(b, v, bz)), nir_fmul(b, w, cz)); + nir_ssa_def *t = nir_fadd(b, nir_fadd(b, nir_fmul(b, u, az), nir_fmul(b, v, bz)), nir_fmul(b, w, cz)); nir_ssa_def *t_signed = nir_fmul(b, nir_fsign(b, det), t); @@ -364,16 +335,13 @@ build_addr_to_node(nir_builder *b, nir_ssa_def *addr) } static nir_ssa_def * -build_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node, - bool skip_type_and) +build_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node, bool skip_type_and) { nir_ssa_def *addr = skip_type_and ? node : nir_iand_imm(b, node, ~7ull); addr = nir_ishl_imm(b, addr, 3); /* Assumes everything is in the top half of address space, which is true in * GFX9+ for now. */ - return device->physical_device->rad_info.gfx_level >= GFX9 - ? nir_ior_imm(b, addr, 0xffffull << 48) - : addr; + return device->physical_device->rad_info.gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr; } nir_ssa_def * @@ -386,8 +354,7 @@ nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[], }; for (unsigned i = 0; i < 3; ++i) { for (unsigned j = 0; j < 3; ++j) { - nir_ssa_def *v = - nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j)); + nir_ssa_def *v = nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j)); result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v; } } @@ -399,20 +366,19 @@ nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_de { unsigned offset = offsetof(struct radv_bvh_instance_node, wto_matrix); for (unsigned i = 0; i < 3; ++i) { - out[i] = nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_addr, offset + i * 16), - .align_mul = 64, .align_offset = offset + i * 16); + out[i] = nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_addr, offset + i * 16), .align_mul = 64, + .align_offset = offset + i * 16); } } /* When a hit is opaque the any_hit shader is skipped for this hit and the hit * is assumed to be an actual hit. */ static nir_ssa_def * -hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, - const struct radv_ray_flags *ray_flags, nir_ssa_def *geometry_id_and_flags) +hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, const struct radv_ray_flags *ray_flags, + nir_ssa_def *geometry_id_and_flags) { - nir_ssa_def *opaque = - nir_uge_imm(b, nir_ior(b, geometry_id_and_flags, sbt_offset_and_flags), - RADV_INSTANCE_FORCE_OPAQUE | RADV_INSTANCE_NO_FORCE_NOT_OPAQUE); + nir_ssa_def *opaque = nir_uge_imm(b, nir_ior(b, geometry_id_and_flags, sbt_offset_and_flags), + RADV_INSTANCE_FORCE_OPAQUE | RADV_INSTANCE_NO_FORCE_NOT_OPAQUE); opaque = nir_bcsel(b, ray_flags->force_opaque, nir_imm_true(b), opaque); opaque = nir_bcsel(b, ray_flags->force_not_opaque, nir_imm_false(b), opaque); return opaque; @@ -425,16 +391,13 @@ create_bvh_descriptor(nir_builder *b) * use the same descriptor, which avoids divergence when different rays hit different * instances at the cost of having to use 64-bit node ids. */ const uint64_t bvh_size = 1ull << 42; - return nir_imm_ivec4( - b, 0, 1u << 31 /* Enable box sorting */, (bvh_size - 1) & 0xFFFFFFFFu, - ((bvh_size - 1) >> 32) | (1u << 24 /* Return IJ for triangles */) | (1u << 31)); + return nir_imm_ivec4(b, 0, 1u << 31 /* Enable box sorting */, (bvh_size - 1) & 0xFFFFFFFFu, + ((bvh_size - 1) >> 32) | (1u << 24 /* Return IJ for triangles */) | (1u << 31)); } static void -insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, - const struct radv_ray_traversal_args *args, - const struct radv_ray_flags *ray_flags, nir_ssa_def *result, - nir_ssa_def *bvh_node) +insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args, + const struct radv_ray_flags *ray_flags, nir_ssa_def *result, nir_ssa_def *bvh_node) { if (!args->triangle_cb) return; @@ -447,19 +410,18 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, nir_push_if(b, nir_flt(b, intersection.t, nir_load_deref(b, args->vars.tmax))); { intersection.frontface = nir_fgt_imm(b, div, 0); - nir_ssa_def *switch_ccw = nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), - RADV_INSTANCE_TRIANGLE_FLIP_FACING); + nir_ssa_def *switch_ccw = + nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), RADV_INSTANCE_TRIANGLE_FLIP_FACING); intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw); nir_ssa_def *not_cull = ray_flags->no_skip_triangles; nir_ssa_def *not_facing_cull = nir_bcsel(b, intersection.frontface, ray_flags->no_cull_front, ray_flags->no_cull_back); - not_cull = - nir_iand(b, not_cull, - nir_ior(b, not_facing_cull, - nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), - RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE))); + not_cull = nir_iand(b, not_cull, + nir_ior(b, not_facing_cull, + nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), + RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE))); nir_push_if(b, nir_iand(b, @@ -468,21 +430,17 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, intersection.base.node_addr = build_node_to_addr(device, b, bvh_node, false); nir_ssa_def *triangle_info = nir_build_load_global( b, 2, 32, - nir_iadd_imm(b, intersection.base.node_addr, - offsetof(struct radv_bvh_triangle_node, triangle_id))); + nir_iadd_imm(b, intersection.base.node_addr, offsetof(struct radv_bvh_triangle_node, triangle_id))); intersection.base.primitive_id = nir_channel(b, triangle_info, 0); intersection.base.geometry_id_and_flags = nir_channel(b, triangle_info, 1); - intersection.base.opaque = - hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), ray_flags, - intersection.base.geometry_id_and_flags); + intersection.base.opaque = hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), ray_flags, + intersection.base.geometry_id_and_flags); - not_cull = nir_bcsel(b, intersection.base.opaque, ray_flags->no_cull_opaque, - ray_flags->no_cull_no_opaque); + not_cull = nir_bcsel(b, intersection.base.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque); nir_push_if(b, not_cull); { nir_ssa_def *divs[2] = {div, div}; - intersection.barycentrics = - nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2)); + intersection.barycentrics = nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2)); args->triangle_cb(b, &intersection, args, ray_flags); } @@ -494,8 +452,7 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, } static void -insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, - const struct radv_ray_traversal_args *args, +insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args, const struct radv_ray_flags *ray_flags, nir_ssa_def *bvh_node) { if (!args->aabb_cb) @@ -504,15 +461,13 @@ insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, struct radv_leaf_intersection intersection; intersection.node_addr = build_node_to_addr(device, b, bvh_node, false); nir_ssa_def *triangle_info = nir_build_load_global( - b, 2, 32, - nir_iadd_imm(b, intersection.node_addr, offsetof(struct radv_bvh_aabb_node, primitive_id))); + b, 2, 32, nir_iadd_imm(b, intersection.node_addr, offsetof(struct radv_bvh_aabb_node, primitive_id))); intersection.primitive_id = nir_channel(b, triangle_info, 0); intersection.geometry_id_and_flags = nir_channel(b, triangle_info, 1); - intersection.opaque = hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), - ray_flags, intersection.geometry_id_and_flags); + intersection.opaque = hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), ray_flags, + intersection.geometry_id_and_flags); - nir_ssa_def *not_cull = - nir_bcsel(b, intersection.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque); + nir_ssa_def *not_cull = nir_bcsel(b, intersection.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque); not_cull = nir_iand(b, not_cull, ray_flags->no_skip_aabbs); nir_push_if(b, not_cull); { @@ -530,8 +485,7 @@ fetch_parent_node(nir_builder *b, nir_ssa_def *bvh, nir_ssa_def *node) } nir_ssa_def * -radv_build_ray_traversal(struct radv_device *device, nir_builder *b, - const struct radv_ray_traversal_args *args) +radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args) { nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete"); nir_store_var(b, incomplete, nir_imm_true(b), 0x1); @@ -542,49 +496,37 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, struct radv_ray_flags ray_flags = { .force_opaque = nir_test_mask(b, args->flags, SpvRayFlagsOpaqueKHRMask), .force_not_opaque = nir_test_mask(b, args->flags, SpvRayFlagsNoOpaqueKHRMask), - .terminate_on_first_hit = - nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask), - .no_cull_front = nir_ieq_imm( - b, nir_iand_imm(b, args->flags, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 0), - .no_cull_back = - nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullBackFacingTrianglesKHRMask), 0), - .no_cull_opaque = - nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullOpaqueKHRMask), 0), - .no_cull_no_opaque = - nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullNoOpaqueKHRMask), 0), - .no_skip_triangles = - nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask), 0), + .terminate_on_first_hit = nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask), + .no_cull_front = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 0), + .no_cull_back = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullBackFacingTrianglesKHRMask), 0), + .no_cull_opaque = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullOpaqueKHRMask), 0), + .no_cull_no_opaque = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullNoOpaqueKHRMask), 0), + .no_skip_triangles = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask), 0), .no_skip_aabbs = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0), }; nir_push_loop(b); { - nir_push_if( - b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), RADV_BVH_INVALID_NODE)); + nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), RADV_BVH_INVALID_NODE)); { /* Early exit if we never overflowed the stack, to avoid having to backtrack to * the root for no reason. */ - nir_push_if(b, nir_ilt_imm(b, nir_load_deref(b, args->vars.stack), - args->stack_base + args->stack_stride)); + nir_push_if(b, nir_ilt_imm(b, nir_load_deref(b, args->vars.stack), args->stack_base + args->stack_stride)); { nir_store_var(b, incomplete, nir_imm_false(b), 0x1); nir_jump(b, nir_jump_break); } nir_pop_if(b, NULL); - nir_ssa_def *stack_instance_exit = nir_ige(b, nir_load_deref(b, args->vars.top_stack), - nir_load_deref(b, args->vars.stack)); + nir_ssa_def *stack_instance_exit = + nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack)); nir_ssa_def *root_instance_exit = - nir_ieq(b, nir_load_deref(b, args->vars.previous_node), - nir_load_deref(b, args->vars.instance_bottom_node)); - nir_if *instance_exit = - nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit)); + nir_ieq(b, nir_load_deref(b, args->vars.previous_node), nir_load_deref(b, args->vars.instance_bottom_node)); + nir_if *instance_exit = nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit)); instance_exit->control = nir_selection_control_dont_flatten; { nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, -1), 1); - nir_store_deref(b, args->vars.previous_node, - nir_load_deref(b, args->vars.instance_top_node), 1); - nir_store_deref(b, args->vars.instance_bottom_node, - nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 1); + nir_store_deref(b, args->vars.previous_node, nir_load_deref(b, args->vars.instance_top_node), 1); + nir_store_deref(b, args->vars.instance_bottom_node, nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 1); nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1); nir_store_deref(b, args->vars.origin, args->origin, 7); @@ -593,12 +535,11 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, } nir_pop_if(b, NULL); - nir_push_if(b, nir_ige(b, nir_load_deref(b, args->vars.stack_low_watermark), - nir_load_deref(b, args->vars.stack))); + nir_push_if( + b, nir_ige(b, nir_load_deref(b, args->vars.stack_low_watermark), nir_load_deref(b, args->vars.stack))); { nir_ssa_def *prev = nir_load_deref(b, args->vars.previous_node); - nir_ssa_def *bvh_addr = - build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true); + nir_ssa_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true); nir_ssa_def *parent = fetch_parent_node(b, bvh_addr, prev); nir_push_if(b, nir_ieq_imm(b, parent, RADV_BVH_INVALID_NODE)); @@ -611,17 +552,14 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, } nir_push_else(b, NULL); { - nir_store_deref( - b, args->vars.stack, - nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1); + nir_store_deref(b, args->vars.stack, + nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1); nir_ssa_def *stack_ptr = - nir_umod_imm(b, nir_load_deref(b, args->vars.stack), - args->stack_stride * args->stack_entries); + nir_umod_imm(b, nir_load_deref(b, args->vars.stack), args->stack_stride * args->stack_entries); nir_ssa_def *bvh_node = args->stack_load_cb(b, stack_ptr, args); nir_store_deref(b, args->vars.current_node, bvh_node, 0x1); - nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), - 0x1); + nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1); } nir_pop_if(b, NULL); } @@ -637,15 +575,14 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, nir_store_deref(b, args->vars.previous_node, bvh_node, 0x1); nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1); - nir_ssa_def *global_bvh_node = - nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node)); + nir_ssa_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node)); nir_ssa_def *intrinsic_result = NULL; if (!radv_emulate_rt(device->physical_device)) { - intrinsic_result = nir_bvh64_intersect_ray_amd( - b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node), nir_load_deref(b, args->vars.tmax), - nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir), - nir_load_deref(b, args->vars.inv_dir)); + intrinsic_result = + nir_bvh64_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node), + nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin), + nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir)); } nir_ssa_def *node_type = nir_iand_imm(b, bvh_node, 7); @@ -660,45 +597,36 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, nir_push_else(b, NULL); { /* instance */ - nir_ssa_def *instance_node_addr = - build_node_to_addr(device, b, global_bvh_node, false); + nir_ssa_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node, false); nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1); - nir_ssa_def *instance_data = nir_build_load_global( - b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0); + nir_ssa_def *instance_data = + nir_build_load_global(b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0); nir_ssa_def *wto_matrix[3]; nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix); - nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), - 1); + nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), 1); nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2); - nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask), - nir_imm_int(b, 1 << 24))); + nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask), nir_imm_int(b, 1 << 24))); { nir_jump(b, nir_jump_continue); } nir_pop_if(b, NULL); nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1); - nir_store_deref(b, args->vars.bvh_base, - nir_pack_64_2x32(b, nir_trim_vector(b, instance_data, 2)), - 1); + nir_store_deref(b, args->vars.bvh_base, nir_pack_64_2x32(b, nir_trim_vector(b, instance_data, 2)), 1); /* Push the instance root node onto the stack */ nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 0x1); - nir_store_deref(b, args->vars.instance_bottom_node, - nir_imm_int(b, RADV_BVH_ROOT_NODE), 1); + nir_store_deref(b, args->vars.instance_bottom_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 1); nir_store_deref(b, args->vars.instance_top_node, bvh_node, 1); /* Transform the ray into object space */ - nir_store_deref(b, args->vars.origin, - nir_build_vec3_mat_mult(b, args->origin, wto_matrix, true), 7); - nir_store_deref(b, args->vars.dir, - nir_build_vec3_mat_mult(b, args->dir, wto_matrix, false), 7); - nir_store_deref(b, args->vars.inv_dir, - nir_fdiv(b, vec3ones, nir_load_deref(b, args->vars.dir)), 7); + nir_store_deref(b, args->vars.origin, nir_build_vec3_mat_mult(b, args->origin, wto_matrix, true), 7); + nir_store_deref(b, args->vars.dir, nir_build_vec3_mat_mult(b, args->dir, wto_matrix, false), 7); + nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, nir_load_deref(b, args->vars.dir)), 7); } nir_pop_if(b, NULL); } @@ -709,9 +637,8 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, /* If we didn't run the intrinsic cause the hardware didn't support it, * emulate ray/box intersection here */ result = intersect_ray_amd_software_box( - device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), - nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir), - nir_load_deref(b, args->vars.inv_dir)); + device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin), + nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir)); } /* box */ @@ -726,18 +653,14 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, for (unsigned i = 4; i-- > 1;) { nir_ssa_def *stack = nir_load_deref(b, args->vars.stack); - nir_ssa_def *stack_ptr = - nir_umod_imm(b, stack, args->stack_entries * args->stack_stride); + nir_ssa_def *stack_ptr = nir_umod_imm(b, stack, args->stack_entries * args->stack_stride); args->stack_store_cb(b, stack_ptr, new_nodes[i], args); - nir_store_deref(b, args->vars.stack, nir_iadd_imm(b, stack, args->stack_stride), - 1); + nir_store_deref(b, args->vars.stack, nir_iadd_imm(b, stack, args->stack_stride), 1); if (i == 1) { nir_ssa_def *new_watermark = - nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), - -args->stack_entries * args->stack_stride); - new_watermark = nir_imax(b, nir_load_deref(b, args->vars.stack_low_watermark), - new_watermark); + nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_entries * args->stack_stride); + new_watermark = nir_imax(b, nir_load_deref(b, args->vars.stack_low_watermark), new_watermark); nir_store_deref(b, args->vars.stack_low_watermark, new_watermark, 0x1); } @@ -749,8 +672,8 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, { nir_ssa_def *next = nir_imm_int(b, RADV_BVH_INVALID_NODE); for (unsigned i = 0; i < 3; ++i) { - next = nir_bcsel(b, nir_ieq(b, prev_node, nir_channel(b, result, i)), - nir_channel(b, result, i + 1), next); + next = nir_bcsel(b, nir_ieq(b, prev_node, nir_channel(b, result, i)), nir_channel(b, result, i + 1), + next); } nir_store_deref(b, args->vars.current_node, next, 0x1); } @@ -765,9 +688,8 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, /* If we didn't run the intrinsic cause the hardware didn't support it, * emulate ray/tri intersection here */ result = intersect_ray_amd_software_tri( - device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), - nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir), - nir_load_deref(b, args->vars.inv_dir)); + device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin), + nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir)); } insert_traversal_triangle_case(device, b, args, &ray_flags, result, global_bvh_node); } diff --git a/src/amd/vulkan/radv_rt_common.h b/src/amd/vulkan/radv_rt_common.h index b5e4d92..9e71e5a 100644 --- a/src/amd/vulkan/radv_rt_common.h +++ b/src/amd/vulkan/radv_rt_common.h @@ -32,23 +32,20 @@ #include "radv_private.h" -void nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices, - uint32_t chan_1, uint32_t chan_2); +void nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices, uint32_t chan_1, + uint32_t chan_2); -nir_ssa_def *intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, - nir_ssa_def *bvh_node, nir_ssa_def *ray_tmax, - nir_ssa_def *origin, nir_ssa_def *dir, +nir_ssa_def *intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node, + nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir, nir_ssa_def *inv_dir); -nir_ssa_def *intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, - nir_ssa_def *bvh_node, nir_ssa_def *ray_tmax, - nir_ssa_def *origin, nir_ssa_def *dir, +nir_ssa_def *intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node, + nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir, nir_ssa_def *inv_dir); nir_ssa_def *build_addr_to_node(nir_builder *b, nir_ssa_def *addr); -nir_ssa_def *nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[], - bool translation); +nir_ssa_def *nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[], bool translation); void nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out); @@ -75,8 +72,7 @@ struct radv_leaf_intersection { nir_ssa_def *opaque; }; -typedef void (*radv_aabb_intersection_cb)(nir_builder *b, - struct radv_leaf_intersection *intersection, +typedef void (*radv_aabb_intersection_cb)(nir_builder *b, struct radv_leaf_intersection *intersection, const struct radv_ray_traversal_args *args); struct radv_triangle_intersection { @@ -87,8 +83,7 @@ struct radv_triangle_intersection { nir_ssa_def *barycentrics; }; -typedef void (*radv_triangle_intersection_cb)(nir_builder *b, - struct radv_triangle_intersection *intersection, +typedef void (*radv_triangle_intersection_cb)(nir_builder *b, struct radv_triangle_intersection *intersection, const struct radv_ray_traversal_args *args, const struct radv_ray_flags *ray_flags); diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c index 3fa6e54..1f16a95 100644 --- a/src/amd/vulkan/radv_rt_shader.c +++ b/src/amd/vulkan/radv_rt_shader.c @@ -132,44 +132,32 @@ create_rt_variables(nir_shader *shader, const VkPipelineCreateFlags flags) .flags = flags, }; vars.idx = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "idx"); - vars.shader_va = - nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_va"); - vars.traversal_addr = - nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "traversal_addr"); + vars.shader_va = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_va"); + vars.traversal_addr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "traversal_addr"); vars.arg = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "arg"); vars.stack_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "stack_ptr"); - vars.shader_record_ptr = - nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_record_ptr"); + vars.shader_record_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_record_ptr"); const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3); - vars.accel_struct = - nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "accel_struct"); - vars.cull_mask_and_flags = - nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "cull_mask_and_flags"); - vars.sbt_offset = - nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_offset"); - vars.sbt_stride = - nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_stride"); - vars.miss_index = - nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "miss_index"); + vars.accel_struct = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "accel_struct"); + vars.cull_mask_and_flags = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "cull_mask_and_flags"); + vars.sbt_offset = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_offset"); + vars.sbt_stride = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_stride"); + vars.miss_index = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "miss_index"); vars.origin = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_origin"); vars.tmin = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmin"); vars.direction = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_direction"); vars.tmax = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmax"); - vars.primitive_id = - nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "primitive_id"); + vars.primitive_id = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "primitive_id"); vars.geometry_id_and_flags = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "geometry_id_and_flags"); - vars.instance_addr = - nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr"); + vars.instance_addr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr"); vars.hit_kind = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "hit_kind"); vars.opaque = nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "opaque"); - vars.ahit_accept = - nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "ahit_accept"); - vars.ahit_terminate = - nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "ahit_terminate"); + vars.ahit_accept = nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "ahit_accept"); + vars.ahit_terminate = nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "ahit_terminate"); return vars; } @@ -178,8 +166,7 @@ create_rt_variables(nir_shader *shader, const VkPipelineCreateFlags flags) * Remap all the variables between the two rt_variables struct for inlining. */ static void -map_rt_variables(struct hash_table *var_remap, struct rt_variables *src, - const struct rt_variables *dst) +map_rt_variables(struct hash_table *var_remap, struct rt_variables *src, const struct rt_variables *dst) { _mesa_hash_table_insert(var_remap, src->idx, dst->idx); _mesa_hash_table_insert(var_remap, src->shader_va, dst->shader_va); @@ -217,20 +204,17 @@ static struct rt_variables create_inner_vars(nir_builder *b, const struct rt_variables *vars) { struct rt_variables inner_vars = *vars; - inner_vars.idx = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_idx"); - inner_vars.shader_record_ptr = nir_variable_create( - b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_shader_record_ptr"); + inner_vars.idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_idx"); + inner_vars.shader_record_ptr = + nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_shader_record_ptr"); inner_vars.primitive_id = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_primitive_id"); - inner_vars.geometry_id_and_flags = nir_variable_create( - b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_geometry_id_and_flags"); - inner_vars.tmax = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "inner_tmax"); - inner_vars.instance_addr = nir_variable_create(b->shader, nir_var_shader_temp, - glsl_uint64_t_type(), "inner_instance_addr"); - inner_vars.hit_kind = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_hit_kind"); + inner_vars.geometry_id_and_flags = + nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_geometry_id_and_flags"); + inner_vars.tmax = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "inner_tmax"); + inner_vars.instance_addr = + nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_instance_addr"); + inner_vars.hit_kind = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_hit_kind"); return inner_vars; } @@ -239,8 +223,7 @@ static void insert_rt_return(nir_builder *b, const struct rt_variables *vars) { nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, nir_load_var(b, vars->stack_ptr), -16), 1); - nir_store_var(b, vars->shader_va, - nir_load_scratch(b, 1, 64, nir_load_var(b, vars->stack_ptr), .align_mul = 16), 1); + nir_store_var(b, vars->shader_va, nir_load_scratch(b, 1, 64, nir_load_var(b, vars->stack_ptr), .align_mul = 16), 1); } enum sbt_type { @@ -263,19 +246,17 @@ get_sbt_ptr(nir_builder *b, nir_ssa_def *idx, enum sbt_type binding) { nir_ssa_def *desc_base_addr = nir_load_sbt_base_amd(b); - nir_ssa_def *desc = - nir_pack_64_2x32(b, nir_build_load_smem_amd(b, 2, desc_base_addr, nir_imm_int(b, binding))); + nir_ssa_def *desc = nir_pack_64_2x32(b, nir_build_load_smem_amd(b, 2, desc_base_addr, nir_imm_int(b, binding))); nir_ssa_def *stride_offset = nir_imm_int(b, binding + (binding == SBT_RAYGEN ? 8 : 16)); - nir_ssa_def *stride = - nir_pack_64_2x32(b, nir_build_load_smem_amd(b, 2, desc_base_addr, stride_offset)); + nir_ssa_def *stride = nir_pack_64_2x32(b, nir_build_load_smem_amd(b, 2, desc_base_addr, stride_offset)); return nir_iadd(b, desc, nir_imul(b, nir_u2u64(b, idx), stride)); } static void -load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_ssa_def *idx, - enum sbt_type binding, enum sbt_entry offset) +load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_ssa_def *idx, enum sbt_type binding, + enum sbt_entry offset) { nir_ssa_def *addr = get_sbt_ptr(b, idx, binding); nir_ssa_def *load_addr = nir_iadd_imm(b, addr, offset); @@ -309,62 +290,46 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca switch (intr->intrinsic) { case nir_intrinsic_rt_execute_callable: { uint32_t size = align(nir_intrinsic_stack_size(intr), 16); - nir_ssa_def *ret_ptr = - nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr)); + nir_ssa_def *ret_ptr = nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr)); ret_ptr = nir_ior_imm(&b_shader, ret_ptr, radv_get_rt_priority(shader->info.stage)); - nir_store_var( - &b_shader, vars->stack_ptr, - nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), size), 1); - nir_store_scratch(&b_shader, ret_ptr, nir_load_var(&b_shader, vars->stack_ptr), - .align_mul = 16); + nir_store_var(&b_shader, vars->stack_ptr, + nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), size), 1); + nir_store_scratch(&b_shader, ret_ptr, nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16); - nir_store_var( - &b_shader, vars->stack_ptr, - nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 16), 1); + nir_store_var(&b_shader, vars->stack_ptr, + nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 16), 1); load_sbt_entry(&b_shader, vars, intr->src[0].ssa, SBT_CALLABLE, SBT_RECURSIVE_PTR); - nir_store_var(&b_shader, vars->arg, - nir_iadd_imm(&b_shader, intr->src[1].ssa, -size - 16), 1); + nir_store_var(&b_shader, vars->arg, nir_iadd_imm(&b_shader, intr->src[1].ssa, -size - 16), 1); vars->stack_size = MAX2(vars->stack_size, size + 16); break; } case nir_intrinsic_rt_trace_ray: { uint32_t size = align(nir_intrinsic_stack_size(intr), 16); - nir_ssa_def *ret_ptr = - nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr)); + nir_ssa_def *ret_ptr = nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr)); ret_ptr = nir_ior_imm(&b_shader, ret_ptr, radv_get_rt_priority(shader->info.stage)); - nir_store_var( - &b_shader, vars->stack_ptr, - nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), size), 1); - nir_store_scratch(&b_shader, ret_ptr, nir_load_var(&b_shader, vars->stack_ptr), - .align_mul = 16); + nir_store_var(&b_shader, vars->stack_ptr, + nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), size), 1); + nir_store_scratch(&b_shader, ret_ptr, nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16); - nir_store_var( - &b_shader, vars->stack_ptr, - nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 16), 1); + nir_store_var(&b_shader, vars->stack_ptr, + nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 16), 1); - nir_store_var(&b_shader, vars->shader_va, - nir_load_var(&b_shader, vars->traversal_addr), 1); - nir_store_var(&b_shader, vars->arg, - nir_iadd_imm(&b_shader, intr->src[10].ssa, -size - 16), 1); + nir_store_var(&b_shader, vars->shader_va, nir_load_var(&b_shader, vars->traversal_addr), 1); + nir_store_var(&b_shader, vars->arg, nir_iadd_imm(&b_shader, intr->src[10].ssa, -size - 16), 1); vars->stack_size = MAX2(vars->stack_size, size + 16); /* Per the SPIR-V extension spec we have to ignore some bits for some arguments. */ nir_store_var(&b_shader, vars->accel_struct, intr->src[0].ssa, 0x1); nir_store_var(&b_shader, vars->cull_mask_and_flags, - nir_ior(&b_shader, nir_ishl_imm(&b_shader, intr->src[2].ssa, 24), - intr->src[1].ssa), - 0x1); - nir_store_var(&b_shader, vars->sbt_offset, - nir_iand_imm(&b_shader, intr->src[3].ssa, 0xf), 0x1); - nir_store_var(&b_shader, vars->sbt_stride, - nir_iand_imm(&b_shader, intr->src[4].ssa, 0xf), 0x1); - nir_store_var(&b_shader, vars->miss_index, - nir_iand_imm(&b_shader, intr->src[5].ssa, 0xffff), 0x1); + nir_ior(&b_shader, nir_ishl_imm(&b_shader, intr->src[2].ssa, 24), intr->src[1].ssa), 0x1); + nir_store_var(&b_shader, vars->sbt_offset, nir_iand_imm(&b_shader, intr->src[3].ssa, 0xf), 0x1); + nir_store_var(&b_shader, vars->sbt_stride, nir_iand_imm(&b_shader, intr->src[4].ssa, 0xf), 0x1); + nir_store_var(&b_shader, vars->miss_index, nir_iand_imm(&b_shader, intr->src[5].ssa, 0xffff), 0x1); nir_store_var(&b_shader, vars->origin, intr->src[6].ssa, 0x7); nir_store_var(&b_shader, vars->tmin, intr->src[7].ssa, 0x1); nir_store_var(&b_shader, vars->direction, intr->src[8].ssa, 0x7); @@ -374,9 +339,8 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca case nir_intrinsic_rt_resume: { uint32_t size = align(nir_intrinsic_stack_size(intr), 16); - nir_store_var( - &b_shader, vars->stack_ptr, - nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), -size), 1); + nir_store_var(&b_shader, vars->stack_ptr, + nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), -size), 1); break; } case nir_intrinsic_rt_return_amd: { @@ -390,15 +354,13 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca case nir_intrinsic_load_scratch: { nir_instr_rewrite_src_ssa( instr, &intr->src[0], - nir_iadd_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), - intr->src[0].ssa)); + nir_iadd_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[0].ssa)); continue; } case nir_intrinsic_store_scratch: { nir_instr_rewrite_src_ssa( instr, &intr->src[1], - nir_iadd_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), - intr->src[1].ssa)); + nir_iadd_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[1].ssa)); continue; } case nir_intrinsic_load_rt_arg_scratch_offset_amd: { @@ -447,13 +409,11 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); ret = nir_build_load_global( &b_shader, 1, 32, - nir_iadd_imm(&b_shader, instance_node_addr, - offsetof(struct radv_bvh_instance_node, instance_id))); + nir_iadd_imm(&b_shader, instance_node_addr, offsetof(struct radv_bvh_instance_node, instance_id))); break; } case nir_intrinsic_load_ray_flags: { - ret = nir_iand_imm(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags), - 0xFFFFFF); + ret = nir_iand_imm(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags), 0xFFFFFF); break; } case nir_intrinsic_load_ray_hit_kind: { @@ -478,29 +438,26 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); nir_ssa_def *rows[3]; for (unsigned r = 0; r < 3; ++r) - rows[r] = nir_build_load_global( - &b_shader, 4, 32, - nir_iadd_imm(&b_shader, instance_node_addr, - offsetof(struct radv_bvh_instance_node, otw_matrix) + r * 16)); - ret = - nir_vec3(&b_shader, nir_channel(&b_shader, rows[0], c), - nir_channel(&b_shader, rows[1], c), nir_channel(&b_shader, rows[2], c)); + rows[r] = + nir_build_load_global(&b_shader, 4, 32, + nir_iadd_imm(&b_shader, instance_node_addr, + offsetof(struct radv_bvh_instance_node, otw_matrix) + r * 16)); + ret = nir_vec3(&b_shader, nir_channel(&b_shader, rows[0], c), nir_channel(&b_shader, rows[1], c), + nir_channel(&b_shader, rows[2], c)); break; } case nir_intrinsic_load_ray_object_origin: { nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); nir_ssa_def *wto_matrix[3]; nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix); - ret = nir_build_vec3_mat_mult(&b_shader, nir_load_var(&b_shader, vars->origin), - wto_matrix, true); + ret = nir_build_vec3_mat_mult(&b_shader, nir_load_var(&b_shader, vars->origin), wto_matrix, true); break; } case nir_intrinsic_load_ray_object_direction: { nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); nir_ssa_def *wto_matrix[3]; nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix); - ret = nir_build_vec3_mat_mult(&b_shader, nir_load_var(&b_shader, vars->direction), - wto_matrix, false); + ret = nir_build_vec3_mat_mult(&b_shader, nir_load_var(&b_shader, vars->direction), wto_matrix, false); break; } case nir_intrinsic_load_intersection_opaque_amd: { @@ -508,8 +465,7 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca break; } case nir_intrinsic_load_cull_mask: { - ret = - nir_ushr_imm(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags), 24); + ret = nir_ushr_imm(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags), 24); break; } case nir_intrinsic_ignore_ray_intersection: { @@ -534,10 +490,8 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca case nir_intrinsic_report_ray_intersection: { nir_push_if( &b_shader, - nir_iand( - &b_shader, - nir_fge(&b_shader, nir_load_var(&b_shader, vars->tmax), intr->src[0].ssa), - nir_fge(&b_shader, intr->src[0].ssa, nir_load_var(&b_shader, vars->tmin)))); + nir_iand(&b_shader, nir_fge(&b_shader, nir_load_var(&b_shader, vars->tmax), intr->src[0].ssa), + nir_fge(&b_shader, intr->src[0].ssa, nir_load_var(&b_shader, vars->tmin)))); { nir_store_var(&b_shader, vars->ahit_accept, nir_imm_true(&b_shader), 0x1); nir_store_var(&b_shader, vars->tmax, intr->src[0].ssa, 1); @@ -570,15 +524,12 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca nir_store_var(&b_shader, vars->hit_kind, intr->src[5].ssa, 0x1); load_sbt_entry(&b_shader, vars, intr->src[0].ssa, SBT_HIT, SBT_RECURSIVE_PTR); - nir_ssa_def *should_return = - nir_test_mask(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags), - SpvRayFlagsSkipClosestHitShaderKHRMask); + nir_ssa_def *should_return = nir_test_mask(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags), + SpvRayFlagsSkipClosestHitShaderKHRMask); - if (!(vars->flags & - VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR)) { - should_return = - nir_ior(&b_shader, should_return, - nir_ieq_imm(&b_shader, nir_load_var(&b_shader, vars->shader_va), 0)); + if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR)) { + should_return = nir_ior(&b_shader, should_return, + nir_ieq_imm(&b_shader, nir_load_var(&b_shader, vars->shader_va), 0)); } /* should_return is set if we had a hit but we won't be calling the closest hit @@ -600,8 +551,7 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR)) { /* In case of a NULL miss shader, do nothing and just return. */ - nir_push_if(&b_shader, - nir_ieq_imm(&b_shader, nir_load_var(&b_shader, vars->shader_va), 0)); + nir_push_if(&b_shader, nir_ieq_imm(&b_shader, nir_load_var(&b_shader, vars->shader_va), 0)); insert_rt_return(&b_shader, vars); nir_pop_if(&b_shader, NULL); } @@ -641,8 +591,7 @@ lower_hit_attrib_deref(nir_builder *b, nir_instr *instr, void *data) return false; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic != nir_intrinsic_load_deref && - intrin->intrinsic != nir_intrinsic_store_deref) + if (intrin->intrinsic != nir_intrinsic_load_deref && intrin->intrinsic != nir_intrinsic_store_deref) return false; nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); @@ -670,11 +619,11 @@ lower_hit_attrib_deref(nir_builder *b, nir_instr *instr, void *data) } else if (bit_size == 32) { components[comp] = nir_load_hit_attrib_amd(b, .base = base); } else if (bit_size == 16) { - components[comp] = nir_channel( - b, nir_unpack_32_2x16(b, nir_load_hit_attrib_amd(b, .base = base)), comp_offset / 2); + components[comp] = + nir_channel(b, nir_unpack_32_2x16(b, nir_load_hit_attrib_amd(b, .base = base)), comp_offset / 2); } else if (bit_size == 8) { - components[comp] = nir_channel( - b, nir_unpack_bits(b, nir_load_hit_attrib_amd(b, .base = base), 8), comp_offset); + components[comp] = + nir_channel(b, nir_unpack_bits(b, nir_load_hit_attrib_amd(b, .base = base), 8), comp_offset); } else { unreachable("Invalid bit_size"); } @@ -702,18 +651,14 @@ lower_hit_attrib_deref(nir_builder *b, nir_instr *instr, void *data) nir_ssa_def *prev = nir_unpack_32_2x16(b, nir_load_hit_attrib_amd(b, .base = base)); nir_ssa_def *components[2]; for (uint32_t word = 0; word < 2; word++) - components[word] = (word == comp_offset / 2) ? nir_channel(b, value, comp) - : nir_channel(b, prev, word); - nir_store_hit_attrib_amd(b, nir_pack_32_2x16(b, nir_vec(b, components, 2)), - .base = base); + components[word] = (word == comp_offset / 2) ? nir_channel(b, value, comp) : nir_channel(b, prev, word); + nir_store_hit_attrib_amd(b, nir_pack_32_2x16(b, nir_vec(b, components, 2)), .base = base); } else if (bit_size == 8) { nir_ssa_def *prev = nir_unpack_bits(b, nir_load_hit_attrib_amd(b, .base = base), 8); nir_ssa_def *components[4]; for (uint32_t byte = 0; byte < 4; byte++) - components[byte] = - (byte == comp_offset) ? nir_channel(b, value, comp) : nir_channel(b, prev, byte); - nir_store_hit_attrib_amd(b, nir_pack_32_4x8(b, nir_vec(b, components, 4)), - .base = base); + components[byte] = (byte == comp_offset) ? nir_channel(b, value, comp) : nir_channel(b, prev, byte); + nir_store_hit_attrib_amd(b, nir_pack_32_4x8(b, nir_vec(b, components, 4)), .base = base); } else { unreachable("Invalid bit_size"); } @@ -727,8 +672,8 @@ lower_hit_attrib_deref(nir_builder *b, nir_instr *instr, void *data) static bool lower_hit_attrib_derefs(nir_shader *shader) { - bool progress = nir_shader_instructions_pass( - shader, lower_hit_attrib_deref, nir_metadata_block_index | nir_metadata_dominance, NULL); + bool progress = nir_shader_instructions_pass(shader, lower_hit_attrib_deref, + nir_metadata_block_index | nir_metadata_dominance, NULL); if (progress) { nir_remove_dead_derefs(shader); nir_remove_dead_variables(shader, nir_var_ray_hit_attrib, NULL); @@ -764,10 +709,9 @@ lower_hit_attribs(nir_shader *shader, nir_variable **hit_attribs, uint32_t workg nir_ssa_def *offset; if (!hit_attribs) - offset = nir_imul_imm(&b, - nir_iadd_imm(&b, nir_load_local_invocation_index(&b), - nir_intrinsic_base(intrin) * workgroup_size), - sizeof(uint32_t)); + offset = nir_imul_imm( + &b, nir_iadd_imm(&b, nir_load_local_invocation_index(&b), nir_intrinsic_base(intrin) * workgroup_size), + sizeof(uint32_t)); if (intrin->intrinsic == nir_intrinsic_load_hit_attrib_amd) { nir_ssa_def *ret; @@ -787,8 +731,7 @@ lower_hit_attribs(nir_shader *shader, nir_variable **hit_attribs, uint32_t workg } if (!hit_attribs) - shader->info.shared_size = - MAX2(shader->info.shared_size, workgroup_size * RADV_MAX_HIT_ATTRIB_SIZE); + shader->info.shared_size = MAX2(shader->info.shared_size, workgroup_size * RADV_MAX_HIT_ATTRIB_SIZE); } static void @@ -814,8 +757,7 @@ inline_constants(nir_shader *dst, nir_shader *src) uint32_t old_constant_data_size = dst->constant_data_size; uint32_t base_offset = align(dst->constant_data_size, align_mul); dst->constant_data_size = base_offset + src->constant_data_size; - dst->constant_data = - rerzalloc_size(dst, dst->constant_data, old_constant_data_size, dst->constant_data_size); + dst->constant_data = rerzalloc_size(dst, dst->constant_data, old_constant_data_size, dst->constant_data_size); memcpy((char *)dst->constant_data + base_offset, src->constant_data, src->constant_data_size); if (!base_offset) @@ -834,12 +776,11 @@ inline_constants(nir_shader *dst, nir_shader *src) } static void -insert_rt_case(nir_builder *b, nir_shader *shader, struct rt_variables *vars, nir_ssa_def *idx, - uint32_t call_idx_base, uint32_t call_idx, unsigned stage_idx, - struct radv_ray_tracing_stage *stages) +insert_rt_case(nir_builder *b, nir_shader *shader, struct rt_variables *vars, nir_ssa_def *idx, uint32_t call_idx_base, + uint32_t call_idx, unsigned stage_idx, struct radv_ray_tracing_stage *stages) { - uint32_t workgroup_size = b->shader->info.workgroup_size[0] * b->shader->info.workgroup_size[1] * - b->shader->info.workgroup_size[2]; + uint32_t workgroup_size = + b->shader->info.workgroup_size[0] * b->shader->info.workgroup_size[1] * b->shader->info.workgroup_size[2]; struct hash_table *var_remap = _mesa_pointer_hash_table_create(NULL); @@ -902,8 +843,7 @@ radv_parse_rt_stage(struct radv_device *device, const VkPipelineShaderStageCreat NIR_PASS(_, shader, lower_rt_derefs); NIR_PASS(_, shader, lower_hit_attrib_derefs); - NIR_PASS(_, shader, nir_lower_explicit_io, nir_var_function_temp, - nir_address_format_32bit_offset); + NIR_PASS(_, shader, nir_lower_explicit_io, nir_var_function_temp, nir_address_format_32bit_offset); return shader; } @@ -954,8 +894,7 @@ lower_any_hit_for_intersection(nir_shader *any_hit) nir_ssa_def *hit_kind = nir_load_param(b, 2); nir_ssa_def *scratch_offset = nir_load_param(b, 3); - nir_deref_instr *commit = - nir_build_deref_cast(b, commit_ptr, nir_var_function_temp, glsl_bool_type(), 0); + nir_deref_instr *commit = nir_build_deref_cast(b, commit_ptr, nir_var_function_temp, glsl_bool_type(), 0); nir_foreach_block_safe (block, impl) { nir_foreach_instr_safe (instr, block) { @@ -1001,13 +940,11 @@ lower_any_hit_for_intersection(nir_shader *any_hit) */ case nir_intrinsic_load_scratch: b->cursor = nir_before_instr(instr); - nir_instr_rewrite_src_ssa(instr, &intrin->src[0], - nir_iadd_nuw(b, scratch_offset, intrin->src[0].ssa)); + nir_instr_rewrite_src_ssa(instr, &intrin->src[0], nir_iadd_nuw(b, scratch_offset, intrin->src[0].ssa)); break; case nir_intrinsic_store_scratch: b->cursor = nir_before_instr(instr); - nir_instr_rewrite_src_ssa(instr, &intrin->src[1], - nir_iadd_nuw(b, scratch_offset, intrin->src[1].ssa)); + nir_instr_rewrite_src_ssa(instr, &intrin->src[1], nir_iadd_nuw(b, scratch_offset, intrin->src[1].ssa)); break; case nir_intrinsic_load_rt_arg_scratch_offset_amd: b->cursor = nir_after_instr(instr); @@ -1165,27 +1102,19 @@ init_traversal_vars(nir_builder *b) ret.origin = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_origin"); ret.dir = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_dir"); - ret.inv_dir = - nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_inv_dir"); - ret.sbt_offset_and_flags = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), - "traversal_sbt_offset_and_flags"); - ret.instance_addr = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr"); + ret.inv_dir = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_inv_dir"); + ret.sbt_offset_and_flags = + nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_sbt_offset_and_flags"); + ret.instance_addr = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr"); ret.hit = nir_variable_create(b->shader, nir_var_shader_temp, glsl_bool_type(), "traversal_hit"); - ret.bvh_base = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), - "traversal_bvh_base"); - ret.stack = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_ptr"); - ret.top_stack = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), - "traversal_top_stack_ptr"); - ret.stack_low_watermark = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), - "traversal_stack_low_watermark"); - ret.current_node = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "current_node;"); - ret.previous_node = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "previous_node"); - ret.instance_top_node = - nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "instance_top_node"); + ret.bvh_base = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "traversal_bvh_base"); + ret.stack = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_ptr"); + ret.top_stack = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_top_stack_ptr"); + ret.stack_low_watermark = + nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_low_watermark"); + ret.current_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "current_node;"); + ret.previous_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "previous_node"); + ret.instance_top_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "instance_top_node"); ret.instance_bottom_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "instance_bottom_node"); return ret; @@ -1227,19 +1156,17 @@ visit_any_hit_shaders(struct radv_device *device, nir_builder *b, struct travers /* Avoid emitting stages with the same shaders/handles multiple times. */ bool is_dup = false; for (unsigned j = 0; j < i; ++j) - if (data->pipeline->groups[j].handle.any_hit_index == - data->pipeline->groups[i].handle.any_hit_index) + if (data->pipeline->groups[j].handle.any_hit_index == data->pipeline->groups[i].handle.any_hit_index) is_dup = true; if (is_dup) continue; - nir_shader *nir_stage = - radv_pipeline_cache_handle_to_nir(device, data->pipeline->stages[shader_id].shader); + nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, data->pipeline->stages[shader_id].shader); assert(nir_stage); - insert_rt_case(b, nir_stage, vars, sbt_idx, 0, data->pipeline->groups[i].handle.any_hit_index, - shader_id, data->pipeline->stages); + insert_rt_case(b, nir_stage, vars, sbt_idx, 0, data->pipeline->groups[i].handle.any_hit_index, shader_id, + data->pipeline->stages); ralloc_free(nir_stage); } @@ -1249,20 +1176,18 @@ visit_any_hit_shaders(struct radv_device *device, nir_builder *b, struct travers static void handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *intersection, - const struct radv_ray_traversal_args *args, - const struct radv_ray_flags *ray_flags) + const struct radv_ray_traversal_args *args, const struct radv_ray_flags *ray_flags) { struct traversal_data *data = args->data; nir_ssa_def *geometry_id = nir_iand_imm(b, intersection->base.geometry_id_and_flags, 0xfffffff); - nir_ssa_def *sbt_idx = nir_iadd( - b, - nir_iadd(b, nir_load_var(b, data->vars->sbt_offset), - nir_iand_imm(b, nir_load_var(b, data->trav_vars->sbt_offset_and_flags), 0xffffff)), - nir_imul(b, nir_load_var(b, data->vars->sbt_stride), geometry_id)); + nir_ssa_def *sbt_idx = + nir_iadd(b, + nir_iadd(b, nir_load_var(b, data->vars->sbt_offset), + nir_iand_imm(b, nir_load_var(b, data->trav_vars->sbt_offset_and_flags), 0xffffff)), + nir_imul(b, nir_load_var(b, data->vars->sbt_stride), geometry_id)); - nir_ssa_def *hit_kind = - nir_bcsel(b, intersection->frontface, nir_imm_int(b, 0xFE), nir_imm_int(b, 0xFF)); + nir_ssa_def *hit_kind = nir_bcsel(b, intersection->frontface, nir_imm_int(b, 0xFE), nir_imm_int(b, 0xFF)); nir_ssa_def *prev_barycentrics = nir_load_var(b, data->barycentrics); nir_store_var(b, data->barycentrics, intersection->barycentrics, 0x3); @@ -1275,11 +1200,9 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int struct rt_variables inner_vars = create_inner_vars(b, data->vars); nir_store_var(b, inner_vars.primitive_id, intersection->base.primitive_id, 1); - nir_store_var(b, inner_vars.geometry_id_and_flags, intersection->base.geometry_id_and_flags, - 1); + nir_store_var(b, inner_vars.geometry_id_and_flags, intersection->base.geometry_id_and_flags, 1); nir_store_var(b, inner_vars.tmax, intersection->t, 0x1); - nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, data->trav_vars->instance_addr), - 0x1); + nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, data->trav_vars->instance_addr), 0x1); nir_store_var(b, inner_vars.hit_kind, hit_kind, 0x1); load_sbt_entry(b, &inner_vars, sbt_idx, SBT_HIT, SBT_ANY_HIT_IDX); @@ -1298,8 +1221,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int nir_store_var(b, data->vars->primitive_id, intersection->base.primitive_id, 1); nir_store_var(b, data->vars->geometry_id_and_flags, intersection->base.geometry_id_and_flags, 1); nir_store_var(b, data->vars->tmax, intersection->t, 0x1); - nir_store_var(b, data->vars->instance_addr, nir_load_var(b, data->trav_vars->instance_addr), - 0x1); + nir_store_var(b, data->vars->instance_addr, nir_load_var(b, data->trav_vars->instance_addr), 0x1); nir_store_var(b, data->vars->hit_kind, hit_kind, 0x1); nir_store_var(b, data->vars->idx, sbt_idx, 1); @@ -1320,11 +1242,11 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio struct traversal_data *data = args->data; nir_ssa_def *geometry_id = nir_iand_imm(b, intersection->geometry_id_and_flags, 0xfffffff); - nir_ssa_def *sbt_idx = nir_iadd( - b, - nir_iadd(b, nir_load_var(b, data->vars->sbt_offset), - nir_iand_imm(b, nir_load_var(b, data->trav_vars->sbt_offset_and_flags), 0xffffff)), - nir_imul(b, nir_load_var(b, data->vars->sbt_stride), geometry_id)); + nir_ssa_def *sbt_idx = + nir_iadd(b, + nir_iadd(b, nir_load_var(b, data->vars->sbt_offset), + nir_iand_imm(b, nir_load_var(b, data->trav_vars->sbt_offset_and_flags), 0xffffff)), + nir_imul(b, nir_load_var(b, data->vars->sbt_stride), geometry_id)); struct rt_variables inner_vars = create_inner_vars(b, data->vars); @@ -1365,21 +1287,19 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio /* Avoid emitting stages with the same shaders/handles multiple times. */ bool is_dup = false; for (unsigned j = 0; j < i; ++j) - if (data->pipeline->groups[j].handle.intersection_index == - data->pipeline->groups[i].handle.intersection_index) + if (data->pipeline->groups[j].handle.intersection_index == data->pipeline->groups[i].handle.intersection_index) is_dup = true; if (is_dup) continue; - nir_shader *nir_stage = - radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[shader_id].shader); + nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[shader_id].shader); assert(nir_stage); nir_shader *any_hit_stage = NULL; if (any_hit_shader_id != VK_SHADER_UNUSED_KHR) { - any_hit_stage = radv_pipeline_cache_handle_to_nir( - data->device, data->pipeline->stages[any_hit_shader_id].shader); + any_hit_stage = + radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[any_hit_shader_id].shader); assert(any_hit_stage); /* reserve stack size for any_hit before it is inlined */ @@ -1390,8 +1310,7 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio } insert_rt_case(b, nir_stage, &inner_vars, nir_load_var(b, inner_vars.idx), 0, - data->pipeline->groups[i].handle.intersection_index, shader_id, - data->pipeline->stages); + data->pipeline->groups[i].handle.intersection_index, shader_id, data->pipeline->stages); ralloc_free(nir_stage); } @@ -1403,14 +1322,12 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio nir_store_var(b, data->vars->primitive_id, intersection->primitive_id, 1); nir_store_var(b, data->vars->geometry_id_and_flags, intersection->geometry_id_and_flags, 1); nir_store_var(b, data->vars->tmax, nir_load_var(b, inner_vars.tmax), 0x1); - nir_store_var(b, data->vars->instance_addr, nir_load_var(b, data->trav_vars->instance_addr), - 0x1); + nir_store_var(b, data->vars->instance_addr, nir_load_var(b, data->trav_vars->instance_addr), 0x1); nir_store_var(b, data->vars->idx, sbt_idx, 1); nir_store_var(b, data->trav_vars->hit, nir_imm_true(b), 1); - nir_ssa_def *terminate_on_first_hit = - nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask); + nir_ssa_def *terminate_on_first_hit = nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask); nir_ssa_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate); nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated)); { @@ -1422,8 +1339,7 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio } static void -store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value, - const struct radv_ray_traversal_args *args) +store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value, const struct radv_ray_traversal_args *args) { nir_store_shared(b, value, index, .base = 0, .align_mul = 4); } @@ -1436,8 +1352,7 @@ load_stack_entry(nir_builder *b, nir_ssa_def *index, const struct radv_ray_trave nir_shader * radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, - const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, - const struct radv_pipeline_key *key) + const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const struct radv_pipeline_key *key) { /* Create the traversal shader as an intersection shader to prevent validation failures due to * invalid variable modes.*/ @@ -1445,19 +1360,17 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_ b.shader->info.internal = false; b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4; - b.shader->info.shared_size = - device->physical_device->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t); + b.shader->info.shared_size = device->physical_device->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t); struct rt_variables vars = create_rt_variables(b.shader, pCreateInfo->flags); /* Register storage for hit attributes */ nir_variable *hit_attribs[RADV_MAX_HIT_ATTRIB_SIZE / sizeof(uint32_t)]; for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++) - hit_attribs[i] = nir_local_variable_create(nir_shader_get_entrypoint(b.shader), - glsl_uint_type(), "ahit_attrib"); + hit_attribs[i] = nir_local_variable_create(nir_shader_get_entrypoint(b.shader), glsl_uint_type(), "ahit_attrib"); - nir_variable *barycentrics = nir_variable_create( - b.shader, nir_var_ray_hit_attrib, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "barycentrics"); + nir_variable *barycentrics = + nir_variable_create(b.shader, nir_var_ray_hit_attrib, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "barycentrics"); barycentrics->data.driver_location = 0; /* initialize trace_ray arguments */ @@ -1478,8 +1391,7 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_ nir_store_var(&b, trav_vars.hit, nir_imm_false(&b), 1); nir_ssa_def *bvh_offset = nir_build_load_global( - &b, 1, 32, - nir_iadd_imm(&b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)), + &b, 1, 32, nir_iadd_imm(&b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)), .access = ACCESS_NON_WRITEABLE); nir_ssa_def *root_bvh_base = nir_iadd(&b, accel_struct, nir_u2u64(&b, bvh_offset)); root_bvh_base = build_addr_to_node(&b, root_bvh_base); @@ -1494,14 +1406,12 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_ nir_store_var(&b, trav_vars.sbt_offset_and_flags, nir_imm_int(&b, 0), 1); nir_store_var(&b, trav_vars.instance_addr, nir_imm_int64(&b, 0), 1); - nir_store_var(&b, trav_vars.stack, - nir_imul_imm(&b, nir_load_local_invocation_index(&b), sizeof(uint32_t)), 1); + nir_store_var(&b, trav_vars.stack, nir_imul_imm(&b, nir_load_local_invocation_index(&b), sizeof(uint32_t)), 1); nir_store_var(&b, trav_vars.stack_low_watermark, nir_load_var(&b, trav_vars.stack), 1); nir_store_var(&b, trav_vars.current_node, nir_imm_int(&b, RADV_BVH_ROOT_NODE), 0x1); nir_store_var(&b, trav_vars.previous_node, nir_imm_int(&b, RADV_BVH_INVALID_NODE), 0x1); nir_store_var(&b, trav_vars.instance_top_node, nir_imm_int(&b, RADV_BVH_INVALID_NODE), 0x1); - nir_store_var(&b, trav_vars.instance_bottom_node, nir_imm_int(&b, RADV_BVH_NO_INSTANCE_ROOT), - 0x1); + nir_store_var(&b, trav_vars.instance_bottom_node, nir_imm_int(&b, RADV_BVH_NO_INSTANCE_ROOT), 0x1); nir_store_var(&b, trav_vars.top_stack, nir_imm_int(&b, -1), 1); @@ -1544,9 +1454,8 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_ .stack_base = 0, .stack_store_cb = store_stack_entry, .stack_load_cb = load_stack_entry, - .aabb_cb = (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR) - ? NULL - : handle_candidate_aabb, + .aabb_cb = + (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR) ? NULL : handle_candidate_aabb, .triangle_cb = (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR) ? NULL : handle_candidate_triangle, @@ -1564,10 +1473,9 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_ { for (int i = 0; i < ARRAY_SIZE(hit_attribs); ++i) nir_store_hit_attrib_amd(&b, nir_load_var(&b, hit_attribs[i]), .base = i); - nir_execute_closest_hit_amd( - &b, nir_load_var(&b, vars.idx), nir_load_var(&b, vars.tmax), - nir_load_var(&b, vars.primitive_id), nir_load_var(&b, vars.instance_addr), - nir_load_var(&b, vars.geometry_id_and_flags), nir_load_var(&b, vars.hit_kind)); + nir_execute_closest_hit_amd(&b, nir_load_var(&b, vars.idx), nir_load_var(&b, vars.tmax), + nir_load_var(&b, vars.primitive_id), nir_load_var(&b, vars.instance_addr), + nir_load_var(&b, vars.geometry_id_and_flags), nir_load_var(&b, vars.hit_kind)); } nir_push_else(&b, NULL); { @@ -1609,12 +1517,9 @@ select_next_shader(nir_builder *b, nir_ssa_def *shader_va, unsigned wave_size) gl_shader_stage stage = b->shader->info.stage; nir_ssa_def *prio = nir_iand_imm(b, shader_va, radv_rt_priority_mask); nir_ssa_def *ballot = nir_ballot(b, 1, wave_size, nir_imm_bool(b, true)); - nir_ssa_def *ballot_traversal = - nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_traversal)); - nir_ssa_def *ballot_hit_miss = - nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_hit_miss)); - nir_ssa_def *ballot_callable = - nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_callable)); + nir_ssa_def *ballot_traversal = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_traversal)); + nir_ssa_def *ballot_hit_miss = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_hit_miss)); + nir_ssa_def *ballot_callable = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_callable)); if (stage != MESA_SHADER_CALLABLE && stage != MESA_SHADER_INTERSECTION) ballot = nir_bcsel(b, nir_ine_imm(b, ballot_traversal, 0), ballot_traversal, ballot); @@ -1630,8 +1535,8 @@ select_next_shader(nir_builder *b, nir_ssa_def *shader_va, unsigned wave_size) void radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, - const struct radv_shader_args *args, const struct radv_shader_info *info, - uint32_t *stack_size, bool resume_shader) + const struct radv_shader_args *args, const struct radv_shader_info *info, uint32_t *stack_size, + bool resume_shader) { nir_builder b; nir_function_impl *impl = nir_shader_get_entrypoint(shader); @@ -1659,31 +1564,26 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH nir_ssa_def *shader_va = ac_nir_load_arg(&b, &args->ac, args->ac.rt.next_shader); shader_va = nir_pack_64_2x32(&b, shader_va); nir_store_var(&b, vars.shader_va, shader_va, 1); - nir_store_var(&b, vars.stack_ptr, - ac_nir_load_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base), 1); + nir_store_var(&b, vars.stack_ptr, ac_nir_load_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base), 1); nir_ssa_def *record_ptr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.shader_record); nir_store_var(&b, vars.shader_record_ptr, nir_pack_64_2x32(&b, record_ptr), 1); nir_store_var(&b, vars.arg, ac_nir_load_arg(&b, &args->ac, args->ac.rt.payload_offset), 1); nir_ssa_def *accel_struct = ac_nir_load_arg(&b, &args->ac, args->ac.rt.accel_struct); nir_store_var(&b, vars.accel_struct, nir_pack_64_2x32(&b, accel_struct), 1); - nir_store_var(&b, vars.cull_mask_and_flags, - ac_nir_load_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags), 1); + nir_store_var(&b, vars.cull_mask_and_flags, ac_nir_load_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags), 1); nir_store_var(&b, vars.sbt_offset, ac_nir_load_arg(&b, &args->ac, args->ac.rt.sbt_offset), 1); nir_store_var(&b, vars.sbt_stride, ac_nir_load_arg(&b, &args->ac, args->ac.rt.sbt_stride), 1); nir_store_var(&b, vars.miss_index, ac_nir_load_arg(&b, &args->ac, args->ac.rt.miss_index), 1); nir_store_var(&b, vars.origin, ac_nir_load_arg(&b, &args->ac, args->ac.rt.ray_origin), 0x7); nir_store_var(&b, vars.tmin, ac_nir_load_arg(&b, &args->ac, args->ac.rt.ray_tmin), 1); - nir_store_var(&b, vars.direction, ac_nir_load_arg(&b, &args->ac, args->ac.rt.ray_direction), - 0x7); + nir_store_var(&b, vars.direction, ac_nir_load_arg(&b, &args->ac, args->ac.rt.ray_direction), 0x7); nir_store_var(&b, vars.tmax, ac_nir_load_arg(&b, &args->ac, args->ac.rt.ray_tmax), 1); - nir_store_var(&b, vars.primitive_id, ac_nir_load_arg(&b, &args->ac, args->ac.rt.primitive_id), - 1); + nir_store_var(&b, vars.primitive_id, ac_nir_load_arg(&b, &args->ac, args->ac.rt.primitive_id), 1); nir_ssa_def *instance_addr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.instance_addr); nir_store_var(&b, vars.instance_addr, nir_pack_64_2x32(&b, instance_addr), 1); - nir_store_var(&b, vars.geometry_id_and_flags, - ac_nir_load_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags), 1); + nir_store_var(&b, vars.geometry_id_and_flags, ac_nir_load_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags), 1); nir_store_var(&b, vars.hit_kind, ac_nir_load_arg(&b, &args->ac, args->ac.rt.hit_kind), 1); /* guard the shader, so that only the correct invocations execute it */ @@ -1709,15 +1609,12 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_pc, next); /* store back all variables to registers */ - ac_nir_store_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base, - nir_load_var(&b, vars.stack_ptr)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base, nir_load_var(&b, vars.stack_ptr)); ac_nir_store_arg(&b, &args->ac, args->ac.rt.next_shader, nir_load_var(&b, vars.shader_va)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_record, - nir_load_var(&b, vars.shader_record_ptr)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_record, nir_load_var(&b, vars.shader_record_ptr)); ac_nir_store_arg(&b, &args->ac, args->ac.rt.payload_offset, nir_load_var(&b, vars.arg)); ac_nir_store_arg(&b, &args->ac, args->ac.rt.accel_struct, nir_load_var(&b, vars.accel_struct)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags, - nir_load_var(&b, vars.cull_mask_and_flags)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags, nir_load_var(&b, vars.cull_mask_and_flags)); ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_offset, nir_load_var(&b, vars.sbt_offset)); ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_stride, nir_load_var(&b, vars.sbt_stride)); ac_nir_store_arg(&b, &args->ac, args->ac.rt.miss_index, nir_load_var(&b, vars.miss_index)); @@ -1728,8 +1625,7 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH ac_nir_store_arg(&b, &args->ac, args->ac.rt.primitive_id, nir_load_var(&b, vars.primitive_id)); ac_nir_store_arg(&b, &args->ac, args->ac.rt.instance_addr, nir_load_var(&b, vars.instance_addr)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags, - nir_load_var(&b, vars.geometry_id_and_flags)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags, nir_load_var(&b, vars.geometry_id_and_flags)); ac_nir_store_arg(&b, &args->ac, args->ac.rt.hit_kind, nir_load_var(&b, vars.hit_kind)); nir_metadata_preserve(impl, nir_metadata_none); @@ -1737,7 +1633,6 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH /* cleanup passes */ NIR_PASS_V(shader, nir_lower_global_vars_to_local); NIR_PASS_V(shader, nir_lower_vars_to_ssa); - if (shader->info.stage == MESA_SHADER_CLOSEST_HIT || - shader->info.stage == MESA_SHADER_INTERSECTION) + if (shader->info.stage == MESA_SHADER_CLOSEST_HIT || shader->info.stage == MESA_SHADER_INTERSECTION) NIR_PASS_V(shader, lower_hit_attribs, NULL, info->wave_size); } diff --git a/src/amd/vulkan/radv_sampler.c b/src/amd/vulkan/radv_sampler.c index 79ab83d..88a767a 100644 --- a/src/amd/vulkan/radv_sampler.c +++ b/src/amd/vulkan/radv_sampler.c @@ -80,11 +80,9 @@ radv_tex_filter(VkFilter filter, unsigned max_ansio) { switch (filter) { case VK_FILTER_NEAREST: - return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT - : V_008F38_SQ_TEX_XY_FILTER_POINT); + return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT : V_008F38_SQ_TEX_XY_FILTER_POINT); case VK_FILTER_LINEAR: - return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR - : V_008F38_SQ_TEX_XY_FILTER_BILINEAR); + return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR : V_008F38_SQ_TEX_XY_FILTER_BILINEAR); case VK_FILTER_CUBIC_EXT: default: fprintf(stderr, "illegal texture filter"); @@ -171,8 +169,7 @@ radv_register_border_color(struct radv_device *device, VkClearColorValue value) for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) { if (!device->border_color_data.used[slot]) { /* Copy to the GPU wrt endian-ness. */ - util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot], &value, - sizeof(VkClearColorValue)); + util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot], &value, sizeof(VkClearColorValue)); device->border_color_data.used[slot] = true; break; @@ -195,23 +192,20 @@ radv_unregister_border_color(struct radv_device *device, uint32_t slot) } static void -radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, - const VkSamplerCreateInfo *pCreateInfo) +radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, const VkSamplerCreateInfo *pCreateInfo) { uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo); uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso); - bool compat_mode = device->physical_device->rad_info.gfx_level == GFX8 || - device->physical_device->rad_info.gfx_level == GFX9; + bool compat_mode = + device->physical_device->rad_info.gfx_level == GFX8 || device->physical_device->rad_info.gfx_level == GFX9; unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND; unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; - bool trunc_coord = - (pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST) || - device->physical_device->rad_info.conformant_trunc_coord; + bool trunc_coord = (pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST) || + device->physical_device->rad_info.conformant_trunc_coord; bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - VkBorderColor border_color = - uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + VkBorderColor border_color = uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; uint32_t border_color_ptr; bool disable_cube_wrap = pCreateInfo->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT; @@ -225,15 +219,13 @@ radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, sampler->border_color_slot = RADV_BORDER_COLOR_COUNT; - if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT || - border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) { + if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT || border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) { const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color = vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); assert(custom_border_color); - sampler->border_color_slot = - radv_register_border_color(device, custom_border_color->customBorderColor); + sampler->border_color_slot = radv_register_border_color(device, custom_border_color->customBorderColor); /* Did we fail to find a slot? */ if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) { @@ -243,18 +235,16 @@ radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, } /* If we don't have a custom color, set the ptr to 0 */ - border_color_ptr = - sampler->border_color_slot != RADV_BORDER_COLOR_COUNT ? sampler->border_color_slot : 0; - - sampler->state[0] = - (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) | - S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) | - S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) | - S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) | - S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) | - S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) | - S_008F30_DISABLE_CUBE_WRAP(disable_cube_wrap) | S_008F30_COMPAT_MODE(compat_mode) | - S_008F30_FILTER_MODE(filter_mode) | S_008F30_TRUNC_COORD(trunc_coord)); + border_color_ptr = sampler->border_color_slot != RADV_BORDER_COLOR_COUNT ? sampler->border_color_slot : 0; + + sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) | + S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) | + S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) | + S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) | + S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) | + S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) | + S_008F30_DISABLE_CUBE_WRAP(disable_cube_wrap) | S_008F30_COMPAT_MODE(compat_mode) | + S_008F30_FILTER_MODE(filter_mode) | S_008F30_TRUNC_COORD(trunc_coord)); sampler->state[1] = (S_008F34_MIN_LOD(radv_float_to_ufixed(CLAMP(pCreateInfo->minLod, 0, 15), 8)) | S_008F34_MAX_LOD(radv_float_to_ufixed(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) | S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); @@ -264,16 +254,14 @@ radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, sampler->state[3] = S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)); if (device->physical_device->rad_info.gfx_level >= GFX10) { - sampler->state[2] |= - S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -32, 31), 8)) | - S_008F38_ANISO_OVERRIDE_GFX10(device->instance->disable_aniso_single_level); + sampler->state[2] |= S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -32, 31), 8)) | + S_008F38_ANISO_OVERRIDE_GFX10(device->instance->disable_aniso_single_level); } else { - sampler->state[2] |= - S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) | - S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.gfx_level <= GFX8) | - S_008F38_FILTER_PREC_FIX(1) | - S_008F38_ANISO_OVERRIDE_GFX8(device->instance->disable_aniso_single_level && - device->physical_device->rad_info.gfx_level >= GFX8); + sampler->state[2] |= S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) | + S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.gfx_level <= GFX8) | + S_008F38_FILTER_PREC_FIX(1) | + S_008F38_ANISO_OVERRIDE_GFX8(device->instance->disable_aniso_single_level && + device->physical_device->rad_info.gfx_level >= GFX8); } if (device->physical_device->rad_info.gfx_level >= GFX11) { @@ -284,8 +272,8 @@ radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, } VKAPI_ATTR VkResult VKAPI_CALL -radv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, VkSampler *pSampler) +radv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, + VkSampler *pSampler) { RADV_FROM_HANDLE(radv_device, device, _device); struct radv_sampler *sampler; @@ -295,8 +283,7 @@ radv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo, assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!sampler) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -304,8 +291,7 @@ radv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo, radv_init_sampler(device, sampler, pCreateInfo); - sampler->ycbcr_sampler = - ycbcr_conversion ? vk_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL; + sampler->ycbcr_sampler = ycbcr_conversion ? vk_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL; *pSampler = radv_sampler_to_handle(sampler); return VK_SUCCESS; diff --git a/src/amd/vulkan/radv_sdma_copy_image.c b/src/amd/vulkan/radv_sdma_copy_image.c index 97423eb..d81dc12 100644 --- a/src/amd/vulkan/radv_sdma_copy_image.c +++ b/src/amd/vulkan/radv_sdma_copy_image.c @@ -27,9 +27,8 @@ #include "radv_private.h" static bool -radv_sdma_v4_v5_copy_image_to_buffer(struct radv_device *device, struct radeon_cmdbuf *cs, - struct radv_image *image, struct radv_buffer *buffer, - const VkBufferImageCopy2 *region) +radv_sdma_v4_v5_copy_image_to_buffer(struct radv_device *device, struct radeon_cmdbuf *cs, struct radv_image *image, + struct radv_buffer *buffer, const VkBufferImageCopy2 *region) { assert(image->plane_count == 1); unsigned bpp = image->planes[0].surface.bpe; @@ -50,8 +49,7 @@ radv_sdma_v4_v5_copy_image_to_buffer(struct radv_device *device, struct radeon_c src_address += image->planes[0].surface.u.gfx9.offset[0]; - radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, - CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 4 : 0))); + radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 4 : 0))); radeon_emit(cs, bytes - 1); radeon_emit(cs, 0); radeon_emit(cs, src_address); @@ -82,23 +80,16 @@ radv_sdma_v4_v5_copy_image_to_buffer(struct radv_device *device, struct radeon_c ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, 14 + (dcc ? 3 : 0)); - radeon_emit(cs, - CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, - (tmz ? 4 : 0)) | - dcc << 19 | (is_v5 ? 0 : 0 /* tiled->buffer.b.b.last_level */) << 20 | - 1u << 31); - radeon_emit(cs, - (uint32_t)tiled_address | (image->planes[0].surface.tile_swizzle << 8)); + radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, (tmz ? 4 : 0)) | + dcc << 19 | (is_v5 ? 0 : 0 /* tiled->buffer.b.b.last_level */) << 20 | 1u << 31); + radeon_emit(cs, (uint32_t)tiled_address | (image->planes[0].surface.tile_swizzle << 8)); radeon_emit(cs, (uint32_t)(tiled_address >> 32)); radeon_emit(cs, 0); radeon_emit(cs, ((tiled_width - 1) << 16)); radeon_emit(cs, (tiled_height - 1)); - radeon_emit( - cs, - util_logbase2(bpp) | image->planes[0].surface.u.gfx9.swizzle_mode << 3 | - image->planes[0].surface.u.gfx9.resource_type << 9 | - (is_v5 ? 0 /* tiled->buffer.b.b.last_level */ : image->planes[0].surface.u.gfx9.epitch) - << 16); + radeon_emit(cs, util_logbase2(bpp) | image->planes[0].surface.u.gfx9.swizzle_mode << 3 | + image->planes[0].surface.u.gfx9.resource_type << 9 | + (is_v5 ? 0 /* tiled->buffer.b.b.last_level */ : image->planes[0].surface.u.gfx9.epitch) << 16); radeon_emit(cs, (uint32_t)linear_address); radeon_emit(cs, (uint32_t)(linear_address >> 32)); radeon_emit(cs, 0); @@ -114,18 +105,16 @@ radv_sdma_v4_v5_copy_image_to_buffer(struct radv_device *device, struct radeon_c unsigned hw_fmt, hw_type; desc = vk_format_description(image->vk.format); - hw_fmt = ac_get_cb_format(device->physical_device->rad_info.gfx_level, - vk_format_to_pipe_format(format)); + hw_fmt = ac_get_cb_format(device->physical_device->rad_info.gfx_level, vk_format_to_pipe_format(format)); hw_type = radv_translate_buffer_numformat(desc, vk_format_get_first_non_void_channel(format)); /* Add metadata */ radeon_emit(cs, (uint32_t)md_address); radeon_emit(cs, (uint32_t)(md_address >> 32)); - radeon_emit(cs, - hw_fmt | vi_alpha_is_on_msb(device, format) << 8 | hw_type << 9 | - image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size << 24 | - V_028C78_MAX_BLOCK_SIZE_256B << 26 | tmz << 29 | - image->planes[0].surface.u.gfx9.color.dcc.pipe_aligned << 31); + radeon_emit(cs, hw_fmt | vi_alpha_is_on_msb(device, format) << 8 | hw_type << 9 | + image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size << 24 | + V_028C78_MAX_BLOCK_SIZE_256B << 26 | tmz << 29 | + image->planes[0].surface.u.gfx9.color.dcc.pipe_aligned << 31); } assert(cs->cdw <= cdw_max); @@ -145,15 +134,14 @@ radv_sdma_copy_image(struct radv_device *device, struct radeon_cmdbuf *cs, struc } void -radv_sdma_copy_buffer(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va, - uint64_t dst_va, uint64_t size) +radv_sdma_copy_buffer(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va, + uint64_t size) { if (size == 0) return; enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; - unsigned max_size_per_packet = - gfx_level >= GFX10_3 ? GFX103_SDMA_COPY_MAX_SIZE : CIK_SDMA_COPY_MAX_SIZE; + unsigned max_size_per_packet = gfx_level >= GFX10_3 ? GFX103_SDMA_COPY_MAX_SIZE : CIK_SDMA_COPY_MAX_SIZE; unsigned align = ~0u; unsigned ncopy = DIV_ROUND_UP(size, max_size_per_packet); bool tmz = false; @@ -176,8 +164,7 @@ radv_sdma_copy_buffer(struct radv_device *device, struct radeon_cmdbuf *cs, uint for (unsigned i = 0; i < ncopy; i++) { unsigned csize = size >= 4 ? MIN2(size & align, max_size_per_packet) : size; - radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_LINEAR, - (tmz ? 1u : 0) << 2)); + radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 1u : 0) << 2)); radeon_emit(cs, gfx_level >= GFX9 ? csize - 1 : csize); radeon_emit(cs, 0); /* src/dst endian swap */ radeon_emit(cs, src_va); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index cfe1365..11d1dcf 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -50,8 +50,8 @@ #include "aco_interface.h" #include "sid.h" #include "vk_format.h" -#include "vk_sync.h" #include "vk_semaphore.h" +#include "vk_sync.h" #include "aco_shader_info.h" #include "radv_aco_shader_info.h" @@ -100,11 +100,9 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s .has_fsub = true, .has_isub = true, .has_sdot_4x8 = device->rad_info.has_accelerated_dot_product, - .has_sudot_4x8 = - device->rad_info.has_accelerated_dot_product && device->rad_info.gfx_level >= GFX11, + .has_sudot_4x8 = device->rad_info.has_accelerated_dot_product && device->rad_info.gfx_level >= GFX11, .has_udot_4x8 = device->rad_info.has_accelerated_dot_product, - .has_dot_2x16 = - device->rad_info.has_accelerated_dot_product && device->rad_info.gfx_level < GFX11, + .has_dot_2x16 = device->rad_info.has_accelerated_dot_product && device->rad_info.gfx_level < GFX11, .has_find_msb_rev = true, .has_pack_half_2x16_rtz = true, .has_fmulz = true, @@ -114,9 +112,8 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s .vectorize_vec2_16bit = true, /* nir_lower_int64() isn't actually called for the LLVM backend, * but this helps the loop unrolling heuristics. */ - .lower_int64_options = nir_lower_imul64 | nir_lower_imul_high64 | nir_lower_imul_2x32_64 | - nir_lower_divmod64 | nir_lower_minmax64 | nir_lower_iabs64 | - nir_lower_iadd_sat64, + .lower_int64_options = nir_lower_imul64 | nir_lower_imul_high64 | nir_lower_imul_2x32_64 | nir_lower_divmod64 | + nir_lower_minmax64 | nir_lower_iabs64 | nir_lower_iadd_sat64, .lower_doubles_options = nir_lower_drcp | nir_lower_dsqrt | nir_lower_drsq | nir_lower_ddiv, .divergence_analysis_options = nir_divergence_view_index_uniform, }; @@ -155,8 +152,7 @@ radv_can_dump_shader(struct radv_device *device, nir_shader *nir, bool meta_shad if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)) return false; - if ((is_meta_shader(nir) || meta_shader) && - !(device->instance->debug_flags & RADV_DEBUG_DUMP_META_SHADERS)) + if ((is_meta_shader(nir) || meta_shader) && !(device->instance->debug_flags & RADV_DEBUG_DUMP_META_SHADERS)) return false; return true; @@ -204,8 +200,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively) NIR_PASS(progress, shader, nir_opt_remove_phis); NIR_PASS(progress, shader, nir_opt_dce); } - NIR_PASS(progress, shader, nir_opt_if, - nir_opt_if_aggressive_last_continue | nir_opt_if_optimize_phi_true_false); + NIR_PASS(progress, shader, nir_opt_if, nir_opt_if_aggressive_last_continue | nir_opt_if_optimize_phi_true_false); NIR_PASS(progress, shader, nir_opt_dead_cf); NIR_PASS(progress, shader, nir_opt_cse); NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true); @@ -220,11 +215,10 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively) } while (progress && !optimize_conservatively); NIR_PASS(progress, shader, nir_opt_shrink_vectors); - NIR_PASS(progress, shader, nir_remove_dead_variables, - nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL); + NIR_PASS(progress, shader, nir_remove_dead_variables, nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, + NULL); - if (shader->info.stage == MESA_SHADER_FRAGMENT && - (shader->info.fs.uses_discard || shader->info.fs.uses_demote)) { + if (shader->info.stage == MESA_SHADER_FRAGMENT && (shader->info.fs.uses_discard || shader->info.fs.uses_demote)) { NIR_PASS(progress, shader, nir_opt_conditional_discard); NIR_PASS(progress, shader, nir_opt_move_discards_to_top); } @@ -287,8 +281,7 @@ struct radv_shader_debug_data { }; static void -radv_spirv_nir_debug(void *private_data, enum nir_spirv_debug_level level, size_t spirv_offset, - const char *message) +radv_spirv_nir_debug(void *private_data, enum nir_spirv_debug_level level, size_t spirv_offset, const char *message) { struct radv_shader_debug_data *debug_data = private_data; struct radv_instance *instance = debug_data->device->instance; @@ -319,8 +312,7 @@ radv_compiler_debug(void *private_data, enum aco_compiler_debug_level level, con /* VK_DEBUG_REPORT_DEBUG_BIT_EXT specifies diagnostic information * from the implementation and layers. */ - vk_debug_report(&instance->vk, vk_flags[level] | VK_DEBUG_REPORT_DEBUG_BIT_EXT, - NULL, 0, 0, "radv", message); + vk_debug_report(&instance->vk, vk_flags[level] | VK_DEBUG_REPORT_DEBUG_BIT_EXT, NULL, 0, 0, "radv", message); } static bool @@ -329,8 +321,7 @@ is_not_xfb_output(nir_variable *var, void *data) if (var->data.mode != nir_var_shader_out) return true; - return !var->data.explicit_xfb_buffer && - !var->data.explicit_xfb_stride; + return !var->data.explicit_xfb_buffer && !var->data.explicit_xfb_stride; } nir_shader * @@ -363,13 +354,11 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_ assert(stage->spirv.size % 4 == 0); bool dump_meta = device->instance->debug_flags & RADV_DEBUG_DUMP_META_SHADERS; - if ((device->instance->debug_flags & RADV_DEBUG_DUMP_SPIRV) && - (!is_internal || dump_meta)) + if ((device->instance->debug_flags & RADV_DEBUG_DUMP_SPIRV) && (!is_internal || dump_meta)) radv_print_spirv(stage->spirv.data, stage->spirv.size, stderr); uint32_t num_spec_entries = 0; - struct nir_spirv_specialization *spec_entries = - vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries); + struct nir_spirv_specialization *spec_entries = vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries); struct radv_shader_debug_data spirv_debug_data = { .device = device, .object = stage->spirv.object, @@ -455,9 +444,8 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_ }, .force_tex_non_uniform = key->tex_non_uniform, }; - nir = spirv_to_nir(spirv, stage->spirv.size / 4, spec_entries, num_spec_entries, stage->stage, - stage->entrypoint, &spirv_options, - &device->physical_device->nir_options[stage->stage]); + nir = spirv_to_nir(spirv, stage->spirv.size / 4, spec_entries, num_spec_entries, stage->stage, stage->entrypoint, + &spirv_options, &device->physical_device->nir_options[stage->stage]); nir->info.internal |= is_internal; assert(nir->info.stage == stage->stage); nir_validate_shader(nir, "after spirv_to_nir"); @@ -484,8 +472,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_ NIR_PASS(_, nir, nir_opt_deref); /* Pick off the single entrypoint that we want */ - foreach_list_typed_safe(nir_function, func, node, &nir->functions) - { + foreach_list_typed_safe (nir_function, func, node, &nir->functions) { if (func->is_entrypoint) func->name = ralloc_strdup(func, "main"); else @@ -522,8 +509,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_ .can_remove_var = is_not_xfb_output, }; NIR_PASS(_, nir, nir_remove_dead_variables, - nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared, - &dead_vars_opts); + nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared, &dead_vars_opts); /* Variables can make nir_propagate_invariant more conservative * than it needs to be. @@ -535,8 +521,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_ NIR_PASS(_, nir, nir_lower_clip_cull_distance_arrays); - if (nir->info.stage == MESA_SHADER_VERTEX || - nir->info.stage == MESA_SHADER_TESS_EVAL || + if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL || nir->info.stage == MESA_SHADER_GEOMETRY) NIR_PASS_V(nir, nir_shader_gather_xfb_info); @@ -564,8 +549,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_ */ .lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH, .lower_local_invocation_index = nir->info.stage == MESA_SHADER_COMPUTE && - ((nir->info.workgroup_size[0] == 1) + - (nir->info.workgroup_size[1] == 1) + + ((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) + (nir->info.workgroup_size[2] == 1)) == 2, }; NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options); @@ -592,8 +576,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_ if (!nir->info.shared_memory_explicit_layout) NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info); - NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, - nir_address_format_32bit_offset); + NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); } NIR_PASS(_, nir, nir_opt_ray_queries); @@ -660,8 +643,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_ */ NIR_PASS(_, nir, nir_lower_var_copies); - unsigned lower_flrp = (nir->options->lower_flrp16 ? 16 : 0) | - (nir->options->lower_flrp32 ? 32 : 0) | + unsigned lower_flrp = (nir->options->lower_flrp16 ? 16 : 0) | (nir->options->lower_flrp32 ? 32 : 0) | (nir->options->lower_flrp64 ? 64 : 0); if (lower_flrp != 0) { bool progress = false; @@ -683,20 +665,17 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_ NIR_PASS(_, nir, radv_nir_lower_intrinsics_early, key); /* Lower deref operations for compute shared memory. */ - if (nir->info.stage == MESA_SHADER_COMPUTE || - nir->info.stage == MESA_SHADER_TASK || + if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK || nir->info.stage == MESA_SHADER_MESH) { nir_variable_mode var_modes = nir_var_mem_shared; - if (nir->info.stage == MESA_SHADER_TASK || - nir->info.stage == MESA_SHADER_MESH) + if (nir->info.stage == MESA_SHADER_TASK || nir->info.stage == MESA_SHADER_MESH) var_modes |= nir_var_mem_task_payload; if (!nir->info.shared_memory_explicit_layout) NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, var_modes, shared_var_info); else if (var_modes & ~nir_var_mem_shared) - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, var_modes & ~nir_var_mem_shared, - shared_var_info); + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, var_modes & ~nir_var_mem_shared, shared_var_info); NIR_PASS(_, nir, nir_lower_explicit_io, var_modes, nir_address_format_32bit_offset); if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) { @@ -706,8 +685,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_ } } - NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global | nir_var_mem_constant, - nir_address_format_64bit_global); + NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global | nir_var_mem_constant, nir_address_format_64bit_global); /* Lower large variables that are always constant with load_constant * intrinsics, which get turned into PC-relative loads from a data @@ -716,13 +694,11 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_ NIR_PASS(_, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16); /* Lower primitive shading rate to match HW requirements. */ - if ((nir->info.stage == MESA_SHADER_VERTEX || - nir->info.stage == MESA_SHADER_GEOMETRY || + if ((nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_GEOMETRY || nir->info.stage == MESA_SHADER_MESH) && nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE)) { /* Lower primitive shading rate to match HW requirements. */ - NIR_PASS(_, nir, radv_nir_lower_primitive_shading_rate, - device->physical_device->rad_info.gfx_level); + NIR_PASS(_, nir, radv_nir_lower_primitive_shading_rate, device->physical_device->rad_info.gfx_level); } /* Indirect lowering must be called after the radv_optimize_nir() loop @@ -730,8 +706,8 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_ * bloat the instruction count of the loop and cause it to be * considered too large for unrolling. */ - if (ac_nir_lower_indirect_derefs(nir, device->physical_device->rad_info.gfx_level) && - !key->optimisations_disabled && nir->info.stage != MESA_SHADER_COMPUTE) { + if (ac_nir_lower_indirect_derefs(nir, device->physical_device->rad_info.gfx_level) && !key->optimisations_disabled && + nir->info.stage != MESA_SHADER_COMPUTE) { /* Optimize the lowered code before the linking optimizations. */ radv_optimize_nir(nir, false); } @@ -797,48 +773,34 @@ radv_consider_culling(const struct radv_physical_device *pdevice, struct nir_sha } static void -setup_ngg_lds_layout(struct radv_device *device, nir_shader *nir, struct radv_shader_info *info, - unsigned max_vtx_in) +setup_ngg_lds_layout(struct radv_device *device, nir_shader *nir, struct radv_shader_info *info, unsigned max_vtx_in) { unsigned scratch_lds_base = 0; gl_shader_stage stage = nir->info.stage; if (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL) { /* Get pervertex LDS usage. */ - bool uses_instanceid = - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID); - bool uses_primitive_id = - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID); + bool uses_instanceid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID); + bool uses_primitive_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID); bool streamout_enabled = nir->xfb_info && device->physical_device->use_ngg_streamout; - unsigned pervertex_lds_bytes = - ac_ngg_nogs_get_pervertex_lds_size(stage, - nir->num_outputs, - streamout_enabled, - info->outinfo.export_prim_id, - false, /* user edge flag */ - info->has_ngg_culling, - uses_instanceid, - uses_primitive_id); + unsigned pervertex_lds_bytes = ac_ngg_nogs_get_pervertex_lds_size( + stage, nir->num_outputs, streamout_enabled, info->outinfo.export_prim_id, false, /* user edge flag */ + info->has_ngg_culling, uses_instanceid, uses_primitive_id); unsigned total_es_lds_bytes = pervertex_lds_bytes * max_vtx_in; scratch_lds_base = ALIGN(total_es_lds_bytes, 8u); } else if (stage == MESA_SHADER_GEOMETRY) { unsigned esgs_ring_lds_bytes = info->ngg_info.esgs_ring_size; unsigned gs_total_out_vtx_bytes = info->ngg_info.ngg_emit_size * 4u; - scratch_lds_base = - ALIGN(esgs_ring_lds_bytes + gs_total_out_vtx_bytes, 8u /* for the repacking code */); + scratch_lds_base = ALIGN(esgs_ring_lds_bytes + gs_total_out_vtx_bytes, 8u /* for the repacking code */); } else { /* not handled here */ return; } /* Get scratch LDS usage. */ - unsigned scratch_lds_size = - ac_ngg_get_scratch_lds_size(stage, - info->workgroup_size, - info->wave_size, - device->physical_device->use_ngg_streamout, - info->has_ngg_culling); + unsigned scratch_lds_size = ac_ngg_get_scratch_lds_size( + stage, info->workgroup_size, info->wave_size, device->physical_device->use_ngg_streamout, info->has_ngg_culling); /* Get total LDS usage. */ nir->info.shared_size = scratch_lds_base + scratch_lds_size; @@ -847,16 +809,15 @@ setup_ngg_lds_layout(struct radv_device *device, nir_shader *nir, struct radv_sh info->ngg_info.scratch_lds_base = scratch_lds_base; } -void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_stage, - const struct radv_pipeline_key *pl_key) +void +radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_stage, + const struct radv_pipeline_key *pl_key) { const struct radv_shader_info *info = &ngg_stage->info; nir_shader *nir = ngg_stage->nir; - assert(nir->info.stage == MESA_SHADER_VERTEX || - nir->info.stage == MESA_SHADER_TESS_EVAL || - nir->info.stage == MESA_SHADER_GEOMETRY || - nir->info.stage == MESA_SHADER_MESH); + assert(nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL || + nir->info.stage == MESA_SHADER_GEOMETRY || nir->info.stage == MESA_SHADER_MESH); const struct gfx10_ngg_info *ngg_info = &info->ngg_info; unsigned num_vertices_per_prim = 3; @@ -911,8 +872,7 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_ options.has_xfb_prim_query = info->has_ngg_xfb_query; options.force_vrs = info->force_vrs_per_vertex; - if (nir->info.stage == MESA_SHADER_VERTEX || - nir->info.stage == MESA_SHADER_TESS_EVAL) { + if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) { assert(info->is_ngg); if (info->has_ngg_culling) @@ -936,13 +896,8 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_ NIR_PASS_V(nir, ac_nir_lower_ngg_gs, &options); } else if (nir->info.stage == MESA_SHADER_MESH) { bool scratch_ring = false; - NIR_PASS_V(nir, ac_nir_lower_ngg_ms, - options.gfx_level, - options.clipdist_enable_mask, - options.vs_output_param_offset, - options.has_param_exports, - &scratch_ring, - info->wave_size, + NIR_PASS_V(nir, ac_nir_lower_ngg_ms, options.gfx_level, options.clipdist_enable_mask, + options.vs_output_param_offset, options.has_param_exports, &scratch_ring, info->wave_size, pl_key->has_multiview_view_index); ngg_stage->info.ms.needs_ms_scratch_ring = scratch_ring; } else { @@ -954,8 +909,7 @@ static unsigned get_size_class(unsigned size, bool round_up) { size = round_up ? util_logbase2_ceil(size) : util_logbase2(size); - unsigned size_class = - MAX2(size, RADV_SHADER_ALLOC_MIN_SIZE_CLASS) - RADV_SHADER_ALLOC_MIN_SIZE_CLASS; + unsigned size_class = MAX2(size, RADV_SHADER_ALLOC_MIN_SIZE_CLASS) - RADV_SHADER_ALLOC_MIN_SIZE_CLASS; return MIN2(size_class, RADV_SHADER_ALLOC_NUM_FREE_LISTS - 1); } @@ -1007,8 +961,7 @@ radv_shader_wait_for_upload(struct radv_device *device, uint64_t seq) .semaphoreCount = 1, .pValues = &seq, }; - return device->vk.dispatch_table.WaitSemaphores(radv_device_to_handle(device), &wait_info, - UINT64_MAX); + return device->vk.dispatch_table.WaitSemaphores(radv_device_to_handle(device), &wait_info, UINT64_MAX); } /* Segregated fit allocator, implementing a good-fit allocation policy. @@ -1034,14 +987,11 @@ radv_alloc_shader_memory(struct radv_device *device, uint32_t size, void *ptr) * at the first one available. */ unsigned free_list_mask = BITFIELD_MASK(RADV_SHADER_ALLOC_NUM_FREE_LISTS); - unsigned size_class = - ffs(device->shader_free_list_mask & (free_list_mask << get_size_class(size, true))); + unsigned size_class = ffs(device->shader_free_list_mask & (free_list_mask << get_size_class(size, true))); if (size_class) { size_class--; - list_for_each_entry(union radv_shader_arena_block, hole, - &device->shader_free_lists[size_class], freelist) - { + list_for_each_entry (union radv_shader_arena_block, hole, &device->shader_free_lists[size_class], freelist) { if (hole->size < size) continue; @@ -1083,21 +1033,17 @@ radv_alloc_shader_memory(struct radv_device *device, uint32_t size, void *ptr) goto fail; unsigned arena_size = - MAX2(RADV_SHADER_ALLOC_MIN_ARENA_SIZE - << MIN2(RADV_SHADER_ALLOC_MAX_ARENA_SIZE_SHIFT, device->shader_arena_shift), + MAX2(RADV_SHADER_ALLOC_MIN_ARENA_SIZE << MIN2(RADV_SHADER_ALLOC_MAX_ARENA_SIZE_SHIFT, device->shader_arena_shift), size); enum radeon_bo_flag flags = RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_32BIT; if (device->shader_use_invisible_vram) flags |= RADEON_FLAG_NO_CPU_ACCESS; else - flags |= - (device->physical_device->rad_info.cpdma_prefetch_writes_memory ? 0 - : RADEON_FLAG_READ_ONLY); + flags |= (device->physical_device->rad_info.cpdma_prefetch_writes_memory ? 0 : RADEON_FLAG_READ_ONLY); VkResult result; - result = - device->ws->buffer_create(device->ws, arena_size, RADV_SHADER_ALLOC_ALIGNMENT, - RADEON_DOMAIN_VRAM, flags, RADV_BO_PRIORITY_SHADER, 0, &arena->bo); + result = device->ws->buffer_create(device->ws, arena_size, RADV_SHADER_ALLOC_ALIGNMENT, RADEON_DOMAIN_VRAM, flags, + RADV_BO_PRIORITY_SHADER, 0, &arena->bo); if (result != VK_SUCCESS) goto fail; radv_rmv_log_bo_allocate(device, arena->bo, arena_size, true); @@ -1222,11 +1168,10 @@ radv_init_shader_arenas(struct radv_device *device) void radv_destroy_shader_arenas(struct radv_device *device) { - list_for_each_entry_safe(union radv_shader_arena_block, block, &device->shader_block_obj_pool, - pool) free(block); + list_for_each_entry_safe (union radv_shader_arena_block, block, &device->shader_block_obj_pool, pool) + free(block); - list_for_each_entry_safe(struct radv_shader_arena, arena, &device->shader_arenas, list) - { + list_for_each_entry_safe (struct radv_shader_arena, arena, &device->shader_arenas, list) { radv_rmv_log_bo_destroy(device, arena->bo); device->ws->buffer_destroy(device->ws, arena->bo); free(arena); @@ -1292,9 +1237,7 @@ radv_destroy_shader_upload_queue(struct radv_device *device) if (device->shader_upload_sem) disp->DestroySemaphore(radv_device_to_handle(device), device->shader_upload_sem, NULL); - list_for_each_entry_safe(struct radv_shader_dma_submission, submission, - &device->shader_dma_submissions, list) - { + list_for_each_entry_safe (struct radv_shader_dma_submission, submission, &device->shader_dma_submissions, list) { if (submission->cs) ws->cs_destroy(submission->cs); if (submission->bo) @@ -1323,8 +1266,7 @@ radv_get_shader_binary_size(size_t code_size) } static bool -radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage, - const struct radv_shader_info *info) +radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info) { enum amd_gfx_level chip = device->physical_device->rad_info.gfx_level; switch (stage) { @@ -1458,14 +1400,13 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi } if (!pdevice->use_ngg_streamout) { - config->rsrc2 |= - S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) | S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) | - S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) | S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) | - S_00B12C_SO_EN(!!info->so.num_outputs); + config->rsrc2 |= S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) | S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) | + S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) | S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) | + S_00B12C_SO_EN(!!info->so.num_outputs); } - config->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / (info->wave_size == 32 ? 8 : 4)) | - S_00B848_DX10_CLAMP(1) | S_00B848_FLOAT_MODE(config->float_mode); + config->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / (info->wave_size == 32 ? 8 : 4)) | S_00B848_DX10_CLAMP(1) | + S_00B848_FLOAT_MODE(config->float_mode); if (pdevice->rad_info.gfx_level >= GFX10) { config->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(args->num_user_sgprs >> 5); @@ -1515,8 +1456,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi } else { config->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en); } - config->rsrc1 |= - S_00B428_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10) | S_00B428_WGP_MODE(wgp_mode); + config->rsrc1 |= S_00B428_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10) | S_00B428_WGP_MODE(wgp_mode); config->rsrc2 |= S_00B42C_SHARED_VGPR_CNT(num_shared_vgpr_blocks); break; case MESA_SHADER_VERTEX: @@ -1575,16 +1515,14 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi FALLTHROUGH; case MESA_SHADER_COMPUTE: case MESA_SHADER_TASK: - config->rsrc1 |= - S_00B848_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10) | S_00B848_WGP_MODE(wgp_mode); - config->rsrc2 |= S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) | - S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) | + config->rsrc1 |= S_00B848_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10) | S_00B848_WGP_MODE(wgp_mode); + config->rsrc2 |= S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) | S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) | S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) | S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2] ? 2 : info->cs.uses_thread_id[1] ? 1 : 0) | - S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) | - S_00B84C_LDS_SIZE(config->lds_size) | S_00B84C_EXCP_EN(excp_en); + S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) | S_00B84C_LDS_SIZE(config->lds_size) | + S_00B84C_EXCP_EN(excp_en); config->rsrc3 |= S_00B8A0_SHARED_VGPR_CNT(num_shared_vgpr_blocks); break; @@ -1594,8 +1532,8 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi } if (pdevice->rad_info.gfx_level >= GFX10 && info->is_ngg && - (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL || - stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_MESH)) { + (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_GEOMETRY || + stage == MESA_SHADER_MESH)) { unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt; gl_shader_stage es_stage = stage; if (stage == MESA_SHADER_GEOMETRY) @@ -1624,13 +1562,12 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi /* TES only needs vertex offset 2 for triangles or quads. */ if (stage == MESA_SHADER_TESS_EVAL) - need_gs_vtx_offset2 &= info->tes._primitive_mode == TESS_PRIMITIVE_TRIANGLES || - info->tes._primitive_mode == TESS_PRIMITIVE_QUADS; + need_gs_vtx_offset2 &= + info->tes._primitive_mode == TESS_PRIMITIVE_TRIANGLES || info->tes._primitive_mode == TESS_PRIMITIVE_QUADS; if (info->uses_invocation_id) { gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */ - } else if (info->uses_prim_id || (es_stage == MESA_SHADER_VERTEX && - info->outinfo.export_prim_id)) { + } else if (info->uses_prim_id || (es_stage == MESA_SHADER_VERTEX && info->outinfo.export_prim_id)) { gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */ } else if (need_gs_vtx_offset2) { gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */ @@ -1643,8 +1580,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi * disable exactly 1 CU per SA for GS. */ config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(wgp_mode); - config->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | - S_00B22C_LDS_SIZE(config->lds_size) | + config->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | S_00B22C_LDS_SIZE(config->lds_size) | S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL); } else if (pdevice->rad_info.gfx_level >= GFX9 && stage == MESA_SHADER_GEOMETRY) { unsigned es_type = info->gs.es_type; @@ -1677,8 +1613,8 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi } config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(wgp_mode); - config->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | - S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL); + config->rsrc2 |= + S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL); } else if (pdevice->rad_info.gfx_level >= GFX9 && stage == MESA_SHADER_TESS_CTRL) { config->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt); } else { @@ -1748,17 +1684,16 @@ radv_shader_binary_upload(struct radv_device *device, const struct radv_shader_b } static VkResult -radv_shader_dma_resize_upload_buf(struct radv_shader_dma_submission *submission, - struct radeon_winsys *ws, uint64_t size) +radv_shader_dma_resize_upload_buf(struct radv_shader_dma_submission *submission, struct radeon_winsys *ws, + uint64_t size) { if (submission->bo) ws->buffer_destroy(ws, submission->bo); - VkResult result = - ws->buffer_create(ws, size, RADV_SHADER_ALLOC_ALIGNMENT, RADEON_DOMAIN_GTT, - RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC, - RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &submission->bo); + VkResult result = ws->buffer_create( + ws, size, RADV_SHADER_ALLOC_ALIGNMENT, RADEON_DOMAIN_GTT, + RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC, + RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &submission->bo); if (result != VK_SUCCESS) return result; @@ -1778,8 +1713,7 @@ radv_shader_dma_pop_submission(struct radv_device *device) while (list_is_empty(&device->shader_dma_submissions)) cnd_wait(&device->shader_dma_submission_list_cond, &device->shader_dma_submission_list_mutex); - submission = - list_first_entry(&device->shader_dma_submissions, struct radv_shader_dma_submission, list); + submission = list_first_entry(&device->shader_dma_submissions, struct radv_shader_dma_submission, list); list_del(&submission->list); mtx_unlock(&device->shader_dma_submission_list_mutex); @@ -1788,8 +1722,7 @@ radv_shader_dma_pop_submission(struct radv_device *device) } void -radv_shader_dma_push_submission(struct radv_device *device, - struct radv_shader_dma_submission *submission, uint64_t seq) +radv_shader_dma_push_submission(struct radv_device *device, struct radv_shader_dma_submission *submission, uint64_t seq) { submission->seq = seq; @@ -1802,8 +1735,7 @@ radv_shader_dma_push_submission(struct radv_device *device, } struct radv_shader_dma_submission * -radv_shader_dma_get_submission(struct radv_device *device, struct radeon_winsys_bo *bo, uint64_t va, - uint64_t size) +radv_shader_dma_get_submission(struct radv_device *device, struct radeon_winsys_bo *bo, uint64_t va, uint64_t size) { struct radv_shader_dma_submission *submission = radv_shader_dma_pop_submission(device); struct radeon_cmdbuf *cs = submission->cs; @@ -1871,8 +1803,7 @@ radv_shader_dma_submit(struct radv_device *device, struct radv_shader_dma_submis }; result = ws->cs_submit(device->shader_upload_hw_ctx, &submit, 0, NULL, 1, &signal_info); - if (result != VK_SUCCESS) - { + if (result != VK_SUCCESS) { mtx_unlock(&device->shader_upload_hw_ctx_mutex); radv_shader_dma_push_submission(device, submission, 0); return false; @@ -1893,7 +1824,6 @@ radv_shader_dma_submit(struct radv_device *device, struct radv_shader_dma_submis return true; } - struct radv_shader * radv_shader_create(struct radv_device *device, const struct radv_shader_binary *binary) { @@ -1901,8 +1831,7 @@ radv_shader_create(struct radv_device *device, const struct radv_shader_binary * if (!shader) return NULL; - vk_pipeline_cache_object_init(&device->vk, &shader->base, &radv_shader_ops, shader->sha1, - SHA1_DIGEST_LENGTH); + vk_pipeline_cache_object_init(&device->vk, &shader->base, &radv_shader_ops, shader->sha1, SHA1_DIGEST_LENGTH); shader->info = binary->info; @@ -1977,8 +1906,7 @@ radv_shader_part_binary_upload(struct radv_device *device, const struct radv_sha if (device->shader_use_invisible_vram) { uint64_t va = radv_buffer_get_va(shader_part->alloc->arena->bo) + shader_part->alloc->offset; - submission = - radv_shader_dma_get_submission(device, shader_part->alloc->arena->bo, va, code_size); + submission = radv_shader_dma_get_submission(device, shader_part->alloc->arena->bo, va, code_size); if (!submission) return false; @@ -2002,8 +1930,7 @@ radv_shader_part_binary_upload(struct radv_device *device, const struct radv_sha } struct radv_shader_part * -radv_shader_part_create(struct radv_device *device, struct radv_shader_part_binary *binary, - unsigned wave_size) +radv_shader_part_create(struct radv_device *device, struct radv_shader_part_binary *binary, unsigned wave_size) { uint32_t code_size = radv_get_shader_binary_size(binary->code_size); struct radv_shader_part *shader_part; @@ -2014,10 +1941,9 @@ radv_shader_part_create(struct radv_device *device, struct radv_shader_part_bina shader_part->ref_count = 1; shader_part->code_size = code_size; - shader_part->rsrc1 = S_00B848_VGPRS((binary->num_vgprs - 1) / (wave_size == 32 ? 8 : 4)) | - S_00B228_SGPRS((binary->num_sgprs - 1) / 8); - shader_part->disasm_string = - binary->disasm_size ? strdup((const char *)(binary->data + binary->code_size)) : NULL; + shader_part->rsrc1 = + S_00B848_VGPRS((binary->num_vgprs - 1) / (wave_size == 32 ? 8 : 4)) | S_00B228_SGPRS((binary->num_sgprs - 1) / 8); + shader_part->disasm_string = binary->disasm_size ? strdup((const char *)(binary->data + binary->code_size)) : NULL; shader_part->spi_shader_col_format = binary->info.spi_shader_col_format; @@ -2063,10 +1989,9 @@ radv_dump_nir_shaders(struct nir_shader *const *shaders, int shader_count) } static void -radv_aco_build_shader_binary(void **bin, const struct ac_shader_config *config, - const char *llvm_ir_str, unsigned llvm_ir_size, const char *disasm_str, - unsigned disasm_size, uint32_t *statistics, uint32_t stats_size, - uint32_t exec_size, const uint32_t *code, uint32_t code_dw, +radv_aco_build_shader_binary(void **bin, const struct ac_shader_config *config, const char *llvm_ir_str, + unsigned llvm_ir_size, const char *disasm_str, unsigned disasm_size, uint32_t *statistics, + uint32_t stats_size, uint32_t exec_size, const uint32_t *code, uint32_t code_dw, const struct aco_symbol *symbols, unsigned num_symbols) { struct radv_shader_binary **binary = (struct radv_shader_binary **)bin; @@ -2090,31 +2015,28 @@ radv_aco_build_shader_binary(void **bin, const struct ac_shader_config *config, memcpy(legacy_binary->data, statistics, stats_size); legacy_binary->stats_size = stats_size; - memcpy(legacy_binary->data + legacy_binary->stats_size, code, - code_dw * sizeof(uint32_t)); + memcpy(legacy_binary->data + legacy_binary->stats_size, code, code_dw * sizeof(uint32_t)); legacy_binary->exec_size = exec_size; legacy_binary->code_size = code_dw * sizeof(uint32_t); legacy_binary->disasm_size = 0; legacy_binary->ir_size = llvm_ir_size; - memcpy((char*)legacy_binary->data + legacy_binary->stats_size + legacy_binary->code_size, - llvm_ir_str, llvm_ir_size); + memcpy((char *)legacy_binary->data + legacy_binary->stats_size + legacy_binary->code_size, llvm_ir_str, + llvm_ir_size); legacy_binary->disasm_size = disasm_size; if (disasm_size) { - memcpy((char*)legacy_binary->data + legacy_binary->stats_size + - legacy_binary->code_size + llvm_ir_size, disasm_str, - disasm_size); + memcpy((char *)legacy_binary->data + legacy_binary->stats_size + legacy_binary->code_size + llvm_ir_size, + disasm_str, disasm_size); } - *binary = (struct radv_shader_binary*)legacy_binary; + *binary = (struct radv_shader_binary *)legacy_binary; } static void -radv_fill_nir_compiler_options(struct radv_nir_compiler_options *options, - struct radv_device *device, const struct radv_pipeline_key *key, - bool should_use_wgp, bool can_dump_shader, bool is_meta_shader, - bool keep_shader_info, bool keep_statistic_info) +radv_fill_nir_compiler_options(struct radv_nir_compiler_options *options, struct radv_device *device, + const struct radv_pipeline_key *key, bool should_use_wgp, bool can_dump_shader, + bool is_meta_shader, bool keep_shader_info, bool keep_statistic_info) { if (key) options->key = *key; @@ -2123,8 +2045,7 @@ radv_fill_nir_compiler_options(struct radv_nir_compiler_options *options, options->wgp_mode = should_use_wgp; options->info = &device->physical_device->rad_info; options->dump_shader = can_dump_shader; - options->dump_preoptir = - options->dump_shader && device->instance->debug_flags & RADV_DEBUG_PREOPTIR; + options->dump_preoptir = options->dump_shader && device->instance->debug_flags & RADV_DEBUG_PREOPTIR; options->record_ir = keep_shader_info; options->record_stats = keep_statistic_info; options->check_ir = device->instance->debug_flags & RADV_DEBUG_CHECKIR; @@ -2153,13 +2074,11 @@ radv_capture_shader_executable_info(struct radv_device *device, struct radv_shad const char *disasm_data; size_t disasm_size; - if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data, - &disasm_size)) { + if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data, &disasm_size)) { return; } - shader->ir_string = - bin->llvm_ir_size ? strdup((const char *)(bin->data + bin->elf_size)) : NULL; + shader->ir_string = bin->llvm_ir_size ? strdup((const char *)(bin->data + bin->elf_size)) : NULL; shader->disasm_string = malloc(disasm_size + 1); memcpy(shader->disasm_string, disasm_data, disasm_size); shader->disasm_string[disasm_size] = 0; @@ -2169,19 +2088,16 @@ radv_capture_shader_executable_info(struct radv_device *device, struct radv_shad } else { struct radv_shader_binary_legacy *bin = (struct radv_shader_binary_legacy *)binary; - shader->ir_string = - bin->ir_size ? strdup((const char *)(bin->data + bin->stats_size + bin->code_size)) : NULL; + shader->ir_string = bin->ir_size ? strdup((const char *)(bin->data + bin->stats_size + bin->code_size)) : NULL; shader->disasm_string = - bin->disasm_size - ? strdup((const char *)(bin->data + bin->stats_size + bin->code_size + bin->ir_size)) - : NULL; + bin->disasm_size ? strdup((const char *)(bin->data + bin->stats_size + bin->code_size + bin->ir_size)) : NULL; } } static struct radv_shader_binary * -shader_compile(struct radv_device *device, struct nir_shader *const *shaders, int shader_count, - gl_shader_stage stage, const struct radv_shader_info *info, - const struct radv_shader_args *args, struct radv_nir_compiler_options *options) +shader_compile(struct radv_device *device, struct nir_shader *const *shaders, int shader_count, gl_shader_stage stage, + const struct radv_shader_info *info, const struct radv_shader_args *args, + struct radv_nir_compiler_options *options) { struct radv_shader_debug_data debug_data = { .device = device, @@ -2206,7 +2122,8 @@ shader_compile(struct radv_device *device, struct nir_shader *const *shaders, in struct aco_compiler_options ac_opts; radv_aco_convert_opts(&ac_opts, options, args); radv_aco_convert_shader_info(&ac_info, info, args, &options->key); - aco_compile_shader(&ac_opts, &ac_info, shader_count, shaders, &args->ac, &radv_aco_build_shader_binary, (void **)&binary); + aco_compile_shader(&ac_opts, &ac_info, shader_count, shaders, &args->ac, &radv_aco_build_shader_binary, + (void **)&binary); } binary->info = *info; @@ -2221,18 +2138,17 @@ shader_compile(struct radv_device *device, struct nir_shader *const *shaders, in struct radv_shader * radv_shader_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, - struct radv_pipeline_stage *pl_stage, struct nir_shader *const *shaders, - int shader_count, const struct radv_pipeline_key *key, bool keep_shader_info, - bool keep_statistic_info, struct radv_shader_binary **binary_out) + struct radv_pipeline_stage *pl_stage, struct nir_shader *const *shaders, int shader_count, + const struct radv_pipeline_key *key, bool keep_shader_info, bool keep_statistic_info, + struct radv_shader_binary **binary_out) { gl_shader_stage stage = shaders[shader_count - 1]->info.stage; struct radv_shader_info *info = &pl_stage->info; struct radv_nir_compiler_options options = {0}; - radv_fill_nir_compiler_options( - &options, device, key, radv_should_use_wgp_mode(device, stage, info), - radv_can_dump_shader(device, shaders[0], false), is_meta_shader(shaders[0]), keep_shader_info, - keep_statistic_info); + radv_fill_nir_compiler_options(&options, device, key, radv_should_use_wgp_mode(device, stage, info), + radv_can_dump_shader(device, shaders[0], false), is_meta_shader(shaders[0]), + keep_shader_info, keep_statistic_info); struct radv_shader_binary *binary = shader_compile(device, shaders, shader_count, stage, info, &pl_stage->args, &options); @@ -2272,8 +2188,7 @@ radv_create_trap_handler_shader(struct radv_device *device) struct radv_shader_info info = {0}; struct radv_pipeline_key key = {0}; struct radv_nir_compiler_options options = {0}; - radv_fill_nir_compiler_options(&options, device, &key, - radv_should_use_wgp_mode(device, stage, &info), false, false, + radv_fill_nir_compiler_options(&options, device, &key, radv_should_use_wgp_mode(device, stage, &info), false, false, false, false); nir_builder b = radv_meta_init_shader(device, stage, "meta_trap_handler"); @@ -2281,11 +2196,9 @@ radv_create_trap_handler_shader(struct radv_device *device) info.wave_size = 64; struct radv_shader_args args; - radv_declare_shader_args(device, &key, &info, stage, MESA_SHADER_NONE, - RADV_SHADER_TYPE_TRAP_HANDLER, &args); + radv_declare_shader_args(device, &key, &info, stage, MESA_SHADER_NONE, RADV_SHADER_TYPE_TRAP_HANDLER, &args); - struct radv_shader_binary *binary = - shader_compile(device, &b.shader, 1, stage, &info, &args, &options); + struct radv_shader_binary *binary = shader_compile(device, &b.shader, 1, stage, &info, &args, &options); struct radv_shader *shader = radv_shader_create(device, binary); ralloc_free(b.shader); @@ -2294,13 +2207,9 @@ radv_create_trap_handler_shader(struct radv_device *device) return shader; } -static void radv_aco_build_shader_part(void **bin, - uint32_t num_sgprs, - uint32_t num_vgprs, - const uint32_t *code, - uint32_t code_size, - const char *disasm_str, - uint32_t disasm_size) +static void +radv_aco_build_shader_part(void **bin, uint32_t num_sgprs, uint32_t num_vgprs, const uint32_t *code, uint32_t code_size, + const char *disasm_str, uint32_t disasm_size) { struct radv_shader_part_binary **binary = (struct radv_shader_part_binary **)bin; size_t size = code_size * sizeof(uint32_t) + sizeof(struct radv_shader_part_binary); @@ -2314,8 +2223,7 @@ static void radv_aco_build_shader_part(void **bin, part_binary->code_size = code_size * sizeof(uint32_t); memcpy(part_binary->data, code, part_binary->code_size); if (disasm_size) { - memcpy((char*)part_binary->data + part_binary->code_size, - disasm_str, disasm_size); + memcpy((char *)part_binary->data + part_binary->code_size, disasm_str, disasm_size); part_binary->disasm_size = disasm_size; } @@ -2367,8 +2275,8 @@ radv_create_rt_prolog(struct radv_device *device) struct aco_compiler_options ac_opts; radv_aco_convert_shader_info(&ac_info, &info, &in_args, &options.key); radv_aco_convert_opts(&ac_opts, &options, &in_args); - aco_compile_rt_prolog(&ac_opts, &ac_info, &in_args.ac, &out_args.ac, - &radv_aco_build_shader_binary, (void **)&binary); + aco_compile_rt_prolog(&ac_opts, &ac_info, &in_args.ac, &out_args.ac, &radv_aco_build_shader_binary, + (void **)&binary); binary->info = info; radv_postprocess_binary_config(device, binary, &in_args); @@ -2414,10 +2322,9 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke struct radv_pipeline_key pipeline_key = {0}; - radv_declare_shader_args( - device, &pipeline_key, &info, key->next_stage, - key->next_stage != MESA_SHADER_VERTEX ? MESA_SHADER_VERTEX : MESA_SHADER_NONE, - RADV_SHADER_TYPE_DEFAULT, &args); + radv_declare_shader_args(device, &pipeline_key, &info, key->next_stage, + key->next_stage != MESA_SHADER_VERTEX ? MESA_SHADER_VERTEX : MESA_SHADER_NONE, + RADV_SHADER_TYPE_DEFAULT, &args); info.user_sgprs_locs = args.user_sgprs_locs; info.inline_push_constant_mask = args.ac.inline_push_const_mask; @@ -2434,8 +2341,7 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke radv_aco_convert_shader_info(&ac_info, &info, &args, &options.key); radv_aco_convert_opts(&ac_opts, &options, &args); radv_aco_convert_vs_prolog_key(&ac_prolog_info, key, &args); - aco_compile_vs_prolog(&ac_opts, &ac_info, &ac_prolog_info, &args.ac, &radv_aco_build_shader_part, - (void **)&binary); + aco_compile_vs_prolog(&ac_opts, &ac_info, &ac_prolog_info, &args.ac, &radv_aco_build_shader_part, (void **)&binary); prolog = radv_shader_part_create(device, binary, info.wave_size); if (!prolog) @@ -2487,8 +2393,7 @@ radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_ke radv_aco_convert_shader_info(&ac_info, &info, &args, &options.key); radv_aco_convert_opts(&ac_opts, &options, &args); radv_aco_convert_ps_epilog_key(&ac_epilog_info, key, &args); - aco_compile_ps_epilog(&ac_opts, &ac_info, &ac_epilog_info, &args.ac, &radv_aco_build_shader_part, - (void **)&binary); + aco_compile_ps_epilog(&ac_opts, &ac_info, &ac_epilog_info, &args.ac, &radv_aco_build_shader_part, (void **)&binary); binary->info.spi_shader_col_format = key->spi_shader_col_format; @@ -2540,14 +2445,12 @@ struct radv_shader * radv_find_shader(struct radv_device *device, uint64_t pc) { mtx_lock(&device->shader_arena_mutex); - list_for_each_entry(struct radv_shader_arena, arena, &device->shader_arenas, list) - { + list_for_each_entry (struct radv_shader_arena, arena, &device->shader_arenas, list) { #ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshadow" #endif - list_for_each_entry(union radv_shader_arena_block, block, &arena->entries, list) - { + list_for_each_entry (union radv_shader_arena_block, block, &arena->entries, list) { #ifdef __GNUC__ #pragma GCC diagnostic pop #endif @@ -2561,8 +2464,7 @@ radv_find_shader(struct radv_device *device, uint64_t pc) if (!shader) continue; - if (pc >= shader->va && - pc < shader->va + align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT)) + if (pc >= shader->va && pc < shader->va + align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT)) return shader; } } @@ -2623,8 +2525,7 @@ radv_get_shader_name(const struct radv_shader_info *info, gl_shader_stage stage) } unsigned -radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader, - gl_shader_stage stage) +radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader, gl_shader_stage stage) { const struct radeon_info *info = &device->physical_device->rad_info; const enum amd_gfx_level gfx_level = info->gfx_level; @@ -2636,13 +2537,11 @@ radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader, max_simd_waves = info->max_wave64_per_simd * (64 / wave_size); if (stage == MESA_SHADER_FRAGMENT) { - lds_per_wave = - conf->lds_size * info->lds_encode_granularity + shader->info.ps.num_interp * 48; + lds_per_wave = conf->lds_size * info->lds_encode_granularity + shader->info.ps.num_interp * 48; lds_per_wave = align(lds_per_wave, info->lds_alloc_granularity); } else if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_TASK) { unsigned max_workgroup_size = shader->info.workgroup_size; - lds_per_wave = - align(conf->lds_size * info->lds_encode_granularity, info->lds_alloc_granularity); + lds_per_wave = align(conf->lds_size * info->lds_encode_granularity, info->lds_alloc_granularity); lds_per_wave /= DIV_ROUND_UP(max_workgroup_size, wave_size); } @@ -2673,8 +2572,7 @@ radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader, } unsigned -radv_compute_spi_ps_input(const struct radv_pipeline_key *pipeline_key, - const struct radv_shader_info *info) +radv_compute_spi_ps_input(const struct radv_pipeline_key *pipeline_key, const struct radv_shader_info *info) { unsigned spi_ps_input; @@ -2683,12 +2581,11 @@ radv_compute_spi_ps_input(const struct radv_pipeline_key *pipeline_key, S_0286CC_PERSP_SAMPLE_ENA(info->ps.reads_persp_sample) | S_0286CC_LINEAR_CENTER_ENA(info->ps.reads_linear_center) | S_0286CC_LINEAR_CENTROID_ENA(info->ps.reads_linear_centroid) | - S_0286CC_LINEAR_SAMPLE_ENA(info->ps.reads_linear_sample)| + S_0286CC_LINEAR_SAMPLE_ENA(info->ps.reads_linear_sample) | S_0286CC_PERSP_PULL_MODEL_ENA(info->ps.reads_barycentric_model) | S_0286CC_FRONT_FACE_ENA(info->ps.reads_front_face); - if (info->ps.reads_frag_coord_mask || - info->ps.reads_sample_pos_mask) { + if (info->ps.reads_frag_coord_mask || info->ps.reads_sample_pos_mask) { uint8_t mask = info->ps.reads_frag_coord_mask | info->ps.reads_sample_pos_mask; for (unsigned i = 0; i < 4; i++) { @@ -2723,8 +2620,8 @@ radv_compute_spi_ps_input(const struct radv_pipeline_key *pipeline_key, } VkResult -radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline, - struct radv_shader *shader, gl_shader_stage stage, FILE *output) +radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader, + gl_shader_stage stage, FILE *output) { VkPipelineExecutablePropertiesKHR *props = NULL; uint32_t prop_count = 0; @@ -2734,8 +2631,7 @@ radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipelin pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR; pipeline_info.pipeline = radv_pipeline_to_handle(pipeline); - result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device), &pipeline_info, - &prop_count, NULL); + result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device), &pipeline_info, &prop_count, NULL); if (result != VK_SUCCESS) return result; @@ -2743,8 +2639,7 @@ radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipelin if (!props) return VK_ERROR_OUT_OF_HOST_MEMORY; - result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device), &pipeline_info, - &prop_count, props); + result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device), &pipeline_info, &prop_count, props); if (result != VK_SUCCESS) goto fail; @@ -2759,8 +2654,7 @@ radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipelin exec_info.pipeline = radv_pipeline_to_handle(pipeline); exec_info.executableIndex = exec_idx; - result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device), &exec_info, - &stat_count, NULL); + result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device), &exec_info, &stat_count, NULL); if (result != VK_SUCCESS) goto fail; @@ -2770,8 +2664,7 @@ radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipelin goto fail; } - result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device), &exec_info, - &stat_count, stats); + result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device), &exec_info, &stat_count, stats); if (result != VK_SUCCESS) { free(stats); goto fail; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 9c403e2..e984db2 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -182,13 +182,12 @@ enum radv_ud_index { AC_UD_MAX_UD = AC_UD_CS_MAX_UD, }; -#define SET_SGPR_FIELD(field, value) \ - (((unsigned)(value) & field##__MASK) << field##__SHIFT) +#define SET_SGPR_FIELD(field, value) (((unsigned)(value)&field##__MASK) << field##__SHIFT) -#define TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS__SHIFT 0 -#define TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS__MASK 0x3f -#define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__SHIFT 6 -#define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__MASK 0xff +#define TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS__SHIFT 0 +#define TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS__MASK 0x3f +#define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__SHIFT 6 +#define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__MASK 0xff #define PS_STATE_NUM_SAMPLES__SHIFT 0 #define PS_STATE_NUM_SAMPLES__MASK 0xf @@ -196,8 +195,8 @@ enum radv_ud_index { #define PS_STATE_LINE_RAST_MODE__MASK 0x3 #define PS_STATE_PS_ITER_MASK__SHIFT 6 #define PS_STATE_PS_ITER_MASK__MASK 0xffff -#define PS_STATE_RAST_PRIM__SHIFT 22 -#define PS_STATE_RAST_PRIM__MASK 0x3 +#define PS_STATE_RAST_PRIM__SHIFT 22 +#define PS_STATE_RAST_PRIM__MASK 0x3 struct radv_streamout_info { uint16_t num_outputs; @@ -476,8 +475,7 @@ struct radv_shader_binary_legacy { * where the +2 is for 0 of the ir strings. */ uint8_t data[0]; }; -static_assert(sizeof(struct radv_shader_binary_legacy) == - offsetof(struct radv_shader_binary_legacy, data), +static_assert(sizeof(struct radv_shader_binary_legacy) == offsetof(struct radv_shader_binary_legacy, data), "Unexpected padding"); struct radv_shader_binary_rtld { @@ -574,13 +572,11 @@ struct radv_pipeline_stage; void radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively); void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets); -void radv_postprocess_nir(struct radv_device *device, - const struct radv_pipeline_layout *pipeline_layout, +void radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_layout *pipeline_layout, const struct radv_pipeline_key *pipeline_key, unsigned last_vgt_api_stage, struct radv_pipeline_stage *stage); -nir_shader *radv_parse_rt_stage(struct radv_device *device, - const VkPipelineShaderStageCreateInfo *sinfo, +nir_shader *radv_parse_rt_stage(struct radv_device *device, const VkPipelineShaderStageCreateInfo *sinfo, const struct radv_pipeline_key *key); void radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, @@ -589,10 +585,8 @@ void radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateI struct radv_pipeline_stage; -nir_shader *radv_shader_spirv_to_nir(struct radv_device *device, - const struct radv_pipeline_stage *stage, - const struct radv_pipeline_key *key, - bool is_internal); +nir_shader *radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_stage *stage, + const struct radv_pipeline_key *key, bool is_internal); void radv_init_shader_arenas(struct radv_device *device); void radv_destroy_shader_arenas(struct radv_device *device); @@ -601,53 +595,42 @@ void radv_destroy_shader_upload_queue(struct radv_device *device); struct radv_shader_args; -struct radv_shader *radv_shader_create(struct radv_device *device, - const struct radv_shader_binary *binary); +struct radv_shader *radv_shader_create(struct radv_device *device, const struct radv_shader_binary *binary); -struct radv_shader *radv_shader_create_cached(struct radv_device *device, - struct vk_pipeline_cache *cache, +struct radv_shader *radv_shader_create_cached(struct radv_device *device, struct vk_pipeline_cache *cache, const struct radv_shader_binary *binary); -struct radv_shader * -radv_shader_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, - struct radv_pipeline_stage *stage, struct nir_shader *const *shaders, - int shader_count, const struct radv_pipeline_key *key, bool keep_shader_info, - bool keep_statistic_info, struct radv_shader_binary **binary_out); +struct radv_shader *radv_shader_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, + struct radv_pipeline_stage *stage, struct nir_shader *const *shaders, + int shader_count, const struct radv_pipeline_key *key, bool keep_shader_info, + bool keep_statistic_info, struct radv_shader_binary **binary_out); VkResult radv_shader_wait_for_upload(struct radv_device *device, uint64_t seq); -struct radv_shader_dma_submission * -radv_shader_dma_pop_submission(struct radv_device *device); +struct radv_shader_dma_submission *radv_shader_dma_pop_submission(struct radv_device *device); -void radv_shader_dma_push_submission(struct radv_device *device, - struct radv_shader_dma_submission *submission, +void radv_shader_dma_push_submission(struct radv_device *device, struct radv_shader_dma_submission *submission, uint64_t seq); -struct radv_shader_dma_submission *radv_shader_dma_get_submission(struct radv_device *device, - struct radeon_winsys_bo *bo, - uint64_t va, uint64_t size); +struct radv_shader_dma_submission * +radv_shader_dma_get_submission(struct radv_device *device, struct radeon_winsys_bo *bo, uint64_t va, uint64_t size); -bool radv_shader_dma_submit(struct radv_device *device, - struct radv_shader_dma_submission *submission, +bool radv_shader_dma_submit(struct radv_device *device, struct radv_shader_dma_submission *submission, uint64_t *upload_seq_out); -union radv_shader_arena_block *radv_alloc_shader_memory(struct radv_device *device, uint32_t size, - void *ptr); +union radv_shader_arena_block *radv_alloc_shader_memory(struct radv_device *device, uint32_t size, void *ptr); void radv_free_shader_memory(struct radv_device *device, union radv_shader_arena_block *alloc); struct radv_shader *radv_create_trap_handler_shader(struct radv_device *device); struct radv_shader *radv_create_rt_prolog(struct radv_device *device); -struct radv_shader_part *radv_shader_part_create(struct radv_device *device, - struct radv_shader_part_binary *binary, +struct radv_shader_part *radv_shader_part_create(struct radv_device *device, struct radv_shader_part_binary *binary, unsigned wave_size); -struct radv_shader_part *radv_create_vs_prolog(struct radv_device *device, - const struct radv_vs_prolog_key *key); +struct radv_shader_part *radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_key *key); -struct radv_shader_part *radv_create_ps_epilog(struct radv_device *device, - const struct radv_ps_epilog_key *key, +struct radv_shader_part *radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_key *key, struct radv_shader_part_binary **binary_out); void radv_shader_part_destroy(struct radv_device *device, struct radv_shader_part *shader_part); @@ -655,20 +638,18 @@ void radv_shader_part_destroy(struct radv_device *device, struct radv_shader_par uint64_t radv_shader_get_va(const struct radv_shader *shader); struct radv_shader *radv_find_shader(struct radv_device *device, uint64_t pc); -unsigned radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader, - gl_shader_stage stage); +unsigned radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader, gl_shader_stage stage); const char *radv_get_shader_name(const struct radv_shader_info *info, gl_shader_stage stage); -unsigned radv_compute_spi_ps_input(const struct radv_pipeline_key *pipeline_key, - const struct radv_shader_info *info); +unsigned radv_compute_spi_ps_input(const struct radv_pipeline_key *pipeline_key, const struct radv_shader_info *info); bool radv_can_dump_shader(struct radv_device *device, nir_shader *nir, bool meta_shader); bool radv_can_dump_shader_stats(struct radv_device *device, nir_shader *nir); -VkResult radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline, - struct radv_shader *shader, gl_shader_stage stage, FILE *output); +VkResult radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader, + gl_shader_stage stage, FILE *output); extern const struct vk_pipeline_cache_object_ops radv_shader_ops; @@ -714,9 +695,8 @@ get_tcs_input_vertex_stride(unsigned tcs_num_inputs) } static inline unsigned -calculate_tess_lds_size(enum amd_gfx_level gfx_level, unsigned tcs_num_input_vertices, - unsigned tcs_num_output_vertices, unsigned tcs_num_inputs, - unsigned tcs_num_patches, unsigned tcs_num_outputs, +calculate_tess_lds_size(enum amd_gfx_level gfx_level, unsigned tcs_num_input_vertices, unsigned tcs_num_output_vertices, + unsigned tcs_num_inputs, unsigned tcs_num_patches, unsigned tcs_num_outputs, unsigned tcs_num_patch_outputs) { unsigned input_vertex_size = get_tcs_input_vertex_stride(tcs_num_inputs); @@ -743,9 +723,8 @@ calculate_tess_lds_size(enum amd_gfx_level gfx_level, unsigned tcs_num_input_ver } static inline unsigned -get_tcs_num_patches(unsigned tcs_num_input_vertices, unsigned tcs_num_output_vertices, - unsigned tcs_num_inputs, unsigned tcs_num_outputs, - unsigned tcs_num_patch_outputs, unsigned tess_offchip_block_dw_size, +get_tcs_num_patches(unsigned tcs_num_input_vertices, unsigned tcs_num_output_vertices, unsigned tcs_num_inputs, + unsigned tcs_num_outputs, unsigned tcs_num_patch_outputs, unsigned tess_offchip_block_dw_size, enum amd_gfx_level gfx_level, enum radeon_family family) { uint32_t input_vertex_size = get_tcs_input_vertex_stride(tcs_num_inputs); @@ -793,14 +772,12 @@ get_tcs_num_patches(unsigned tcs_num_input_vertices, unsigned tcs_num_output_ver void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_stage, const struct radv_pipeline_key *pl_key); -bool radv_consider_culling(const struct radv_physical_device *pdevice, struct nir_shader *nir, - uint64_t ps_inputs_read, unsigned num_vertices_per_primitive, - const struct radv_shader_info *info); +bool radv_consider_culling(const struct radv_physical_device *pdevice, struct nir_shader *nir, uint64_t ps_inputs_read, + unsigned num_vertices_per_primitive, const struct radv_shader_info *info); void radv_get_nir_options(struct radv_physical_device *device); -nir_shader *radv_build_traversal_shader(struct radv_device *device, - struct radv_ray_tracing_pipeline *pipeline, +nir_shader *radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const struct radv_pipeline_key *key); diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index fdcb149..6904245 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -38,8 +38,7 @@ struct user_sgpr_info { }; static void -allocate_inline_push_consts(const struct radv_shader_info *info, - struct user_sgpr_info *user_sgpr_info) +allocate_inline_push_consts(const struct radv_shader_info *info, struct user_sgpr_info *user_sgpr_info) { uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs; @@ -52,8 +51,8 @@ allocate_inline_push_consts(const struct radv_shader_info *info, /* Disable the default push constants path if all constants can be inlined and if shaders don't * use dynamic descriptors. */ - if (num_push_consts <= MIN2(remaining_sgprs + 1, AC_MAX_INLINE_PUSH_CONSTS) && - info->can_inline_all_push_constants && !info->loads_dynamic_offsets) { + if (num_push_consts <= MIN2(remaining_sgprs + 1, AC_MAX_INLINE_PUSH_CONSTS) && info->can_inline_all_push_constants && + !info->loads_dynamic_offsets) { user_sgpr_info->inlined_all_push_consts = true; remaining_sgprs++; } else { @@ -85,8 +84,7 @@ add_ud_arg(struct radv_shader_args *args, unsigned size, enum ac_arg_type type, } static void -add_descriptor_set(struct radv_shader_args *args, enum ac_arg_type type, struct ac_arg *arg, - uint32_t set) +add_descriptor_set(struct radv_shader_args *args, enum ac_arg_type type, struct ac_arg *arg, uint32_t set) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, type, arg); @@ -99,8 +97,7 @@ add_descriptor_set(struct radv_shader_args *args, enum ac_arg_type type, struct } static void -declare_global_input_sgprs(const struct radv_shader_info *info, - const struct user_sgpr_info *user_sgpr_info, +declare_global_input_sgprs(const struct radv_shader_info *info, const struct user_sgpr_info *user_sgpr_info, struct radv_shader_args *args) { if (user_sgpr_info) { @@ -114,8 +111,7 @@ declare_global_input_sgprs(const struct radv_shader_info *info, add_descriptor_set(args, AC_ARG_CONST_PTR, &args->descriptor_sets[i], i); } } else { - add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0], - AC_UD_INDIRECT_DESCRIPTOR_SETS); + add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0], AC_UD_INDIRECT_DESCRIPTOR_SETS); } if (info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts) { @@ -124,8 +120,7 @@ declare_global_input_sgprs(const struct radv_shader_info *info, } for (unsigned i = 0; i < util_bitcount64(user_sgpr_info->inline_push_constant_mask); i++) { - add_ud_arg(args, 1, AC_ARG_INT, &args->ac.inline_push_consts[i], - AC_UD_INLINE_PUSH_CONSTANTS); + add_ud_arg(args, 1, AC_ARG_INT, &args->ac.inline_push_consts[i], AC_UD_INLINE_PUSH_CONSTANTS); } args->ac.inline_push_const_mask = user_sgpr_info->inline_push_constant_mask; } @@ -145,8 +140,7 @@ declare_vs_specific_input_sgprs(const struct radv_shader_info *info, struct radv if (args->type != RADV_SHADER_TYPE_GS_COPY && (stage == MESA_SHADER_VERTEX || previous_stage == MESA_SHADER_VERTEX)) { if (info->vs.vb_desc_usage_mask) { - add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers, - AC_UD_VS_VERTEX_BUFFERS); + add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers, AC_UD_VS_VERTEX_BUFFERS); } add_ud_arg(args, 1, AC_ARG_INT, &args->ac.base_vertex, AC_UD_VS_BASE_VERTEX_START_INSTANCE); @@ -154,15 +148,14 @@ declare_vs_specific_input_sgprs(const struct radv_shader_info *info, struct radv add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_VS_BASE_VERTEX_START_INSTANCE); } if (info->vs.needs_base_instance) { - add_ud_arg(args, 1, AC_ARG_INT, &args->ac.start_instance, - AC_UD_VS_BASE_VERTEX_START_INSTANCE); + add_ud_arg(args, 1, AC_ARG_INT, &args->ac.start_instance, AC_UD_VS_BASE_VERTEX_START_INSTANCE); } } } static void -declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info, - struct radv_shader_args *args, bool merged_vs_tcs) +declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info, struct radv_shader_args *args, + bool merged_vs_tcs) { ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id); if (args->type != RADV_SHADER_TYPE_GS_COPY) { @@ -215,8 +208,7 @@ declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_in } static void -declare_streamout_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, - gl_shader_stage stage) +declare_streamout_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, gl_shader_stage stage) { int i; @@ -294,8 +286,8 @@ declare_ps_input_vgprs(const struct radv_shader_info *info, struct radv_shader_a } static void -declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, - bool has_ngg_query, bool has_ngg_provoking_vtx) +declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, bool has_ngg_query, + bool has_ngg_provoking_vtx) { if (has_ngg_query) add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_query_state, AC_UD_NGG_QUERY_STATE); @@ -313,8 +305,8 @@ declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args * } static void -radv_init_shader_args(const struct radv_device *device, gl_shader_stage stage, - enum radv_shader_type type, struct radv_shader_args *args) +radv_init_shader_args(const struct radv_device *device, gl_shader_stage stage, enum radv_shader_type type, + struct radv_shader_args *args) { memset(args, 0, sizeof(*args)); @@ -333,8 +325,7 @@ void radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_args *args) { add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.shader_pc, AC_UD_SCRATCH_RING_OFFSETS); - add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0], - AC_UD_INDIRECT_DESCRIPTOR_SETS); + add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0], AC_UD_INDIRECT_DESCRIPTOR_SETS); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants); ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors); ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader); @@ -376,8 +367,7 @@ radv_ps_needs_state_sgpr(const struct radv_shader_info *info, const struct radv_ if (key->dynamic_line_rast_mode) return true; - if (info->ps.reads_sample_mask_in && - (info->ps.uses_sample_shading || key->ps.sample_shading_enable)) + if (info->ps.reads_sample_mask_in && (info->ps.uses_sample_shading || key->ps.sample_shading_enable)) return true; /* For computing barycentrics when the primitive topology is unknown at compile time (GPL). */ @@ -389,16 +379,15 @@ radv_ps_needs_state_sgpr(const struct radv_shader_info *info, const struct radv_ static void declare_shader_args(const struct radv_device *device, const struct radv_pipeline_key *key, - const struct radv_shader_info *info, gl_shader_stage stage, - gl_shader_stage previous_stage, enum radv_shader_type type, - struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info) + const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage, + enum radv_shader_type type, struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info) { const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; bool needs_view_index = info->uses_view_index; bool has_ngg_query = info->has_ngg_prim_query || info->has_ngg_xfb_query || (stage == MESA_SHADER_GEOMETRY && info->gs.has_ngg_pipeline_stat_query); - bool has_ngg_provoking_vtx = (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_GEOMETRY) && - key->dynamic_provoking_vtx_mode; + bool has_ngg_provoking_vtx = + (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_GEOMETRY) && key->dynamic_provoking_vtx_mode; if (gfx_level >= GFX10 && info->is_ngg && stage != MESA_SHADER_GEOMETRY) { /* Handle all NGG shaders as GS to simplify the code here. */ @@ -415,8 +404,7 @@ declare_shader_args(const struct radv_device *device, const struct radv_pipeline add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets, AC_UD_SCRATCH_RING_OFFSETS); if (stage == MESA_SHADER_TASK) { - add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->task_ring_offsets, - AC_UD_CS_TASK_RING_OFFSETS); + add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->task_ring_offsets, AC_UD_CS_TASK_RING_OFFSETS); } /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including @@ -442,12 +430,9 @@ declare_shader_args(const struct radv_device *device, const struct radv_pipeline } if (info->cs.is_rt_shader) { - add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors, - AC_UD_CS_SBT_DESCRIPTORS); - add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader, - AC_UD_CS_TRAVERSAL_SHADER_ADDR); - add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.launch_size_addr, - AC_UD_CS_RAY_LAUNCH_SIZE_ADDR); + add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors, AC_UD_CS_SBT_DESCRIPTORS); + add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader, AC_UD_CS_TRAVERSAL_SHADER_ADDR); + add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.launch_size_addr, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR); add_ud_arg(args, 1, AC_ARG_INT, &args->ac.rt.dynamic_callable_stack_base, AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE); } @@ -702,9 +687,8 @@ declare_shader_args(const struct radv_device *device, const struct radv_pipeline void radv_declare_shader_args(const struct radv_device *device, const struct radv_pipeline_key *key, - const struct radv_shader_info *info, gl_shader_stage stage, - gl_shader_stage previous_stage, enum radv_shader_type type, - struct radv_shader_args *args) + const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage, + enum radv_shader_type type, struct radv_shader_args *args) { declare_shader_args(device, key, info, stage, previous_stage, type, args, NULL); @@ -716,8 +700,7 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_pip num_user_sgprs++; const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; - uint32_t available_sgprs = - gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16; + uint32_t available_sgprs = gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16; uint32_t remaining_sgprs = available_sgprs - num_user_sgprs; struct user_sgpr_info user_sgpr_info = { diff --git a/src/amd/vulkan/radv_shader_args.h b/src/amd/vulkan/radv_shader_args.h index cc4eb82..809031a 100644 --- a/src/amd/vulkan/radv_shader_args.h +++ b/src/amd/vulkan/radv_shader_args.h @@ -102,8 +102,7 @@ void radv_declare_shader_args(const struct radv_device *device, const struct rad gl_shader_stage previous_stage, enum radv_shader_type type, struct radv_shader_args *args); -void radv_declare_ps_epilog_args(const struct radv_device *device, - const struct radv_ps_epilog_key *key, +void radv_declare_ps_epilog_args(const struct radv_device *device, const struct radv_ps_epilog_key *key, struct radv_shader_args *args); void radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_args *args); diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 66a4065..0638728 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -34,8 +34,7 @@ mark_sampler_desc(const nir_variable *var, struct radv_shader_info *info) } static void -gather_intrinsic_load_input_info(const nir_shader *nir, const nir_intrinsic_instr *instr, - struct radv_shader_info *info) +gather_intrinsic_load_input_info(const nir_shader *nir, const nir_intrinsic_instr *instr, struct radv_shader_info *info) { switch (nir->info.stage) { case MESA_SHADER_VERTEX: { @@ -112,8 +111,7 @@ gather_intrinsic_store_output_info(const nir_shader *nir, const nir_intrinsic_in } static void -gather_push_constant_info(const nir_shader *nir, const nir_intrinsic_instr *instr, - struct radv_shader_info *info) +gather_push_constant_info(const nir_shader *nir, const nir_intrinsic_instr *instr, struct radv_shader_info *info) { info->loads_push_constants = true; @@ -131,8 +129,8 @@ gather_push_constant_info(const nir_shader *nir, const nir_intrinsic_instr *inst } static void -gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, - struct radv_shader_info *info, bool consider_force_vrs) +gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, struct radv_shader_info *info, + bool consider_force_vrs) { switch (instr->intrinsic) { case nir_intrinsic_load_barycentric_sample: @@ -211,8 +209,7 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, case nir_intrinsic_image_deref_atomic_swap: case nir_intrinsic_image_deref_size: case nir_intrinsic_image_deref_samples: { - nir_variable *var = - nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); + nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); mark_sampler_desc(var, info); break; } @@ -257,8 +254,7 @@ gather_tex_info(const nir_shader *nir, const nir_tex_instr *instr, struct radv_s } static void -gather_info_block(const nir_shader *nir, const nir_block *block, struct radv_shader_info *info, - bool consider_force_vrs) +gather_info_block(const nir_shader *nir, const nir_block *block, struct radv_shader_info *info, bool consider_force_vrs) { nir_foreach_instr (instr, block) { switch (instr->type) { @@ -320,27 +316,26 @@ gather_xfb_info(const nir_shader *nir, struct radv_shader_info *info) } static void -assign_outinfo_param(struct radv_vs_output_info *outinfo, gl_varying_slot idx, - unsigned *total_param_exports, unsigned extra_offset) +assign_outinfo_param(struct radv_vs_output_info *outinfo, gl_varying_slot idx, unsigned *total_param_exports, + unsigned extra_offset) { if (outinfo->vs_output_param_offset[idx] == AC_EXP_PARAM_UNDEFINED) outinfo->vs_output_param_offset[idx] = extra_offset + (*total_param_exports)++; } static void -assign_outinfo_params(struct radv_vs_output_info *outinfo, uint64_t mask, - unsigned *total_param_exports, unsigned extra_offset) +assign_outinfo_params(struct radv_vs_output_info *outinfo, uint64_t mask, unsigned *total_param_exports, + unsigned extra_offset) { - u_foreach_bit64(idx, mask) { - if (idx >= VARYING_SLOT_VAR0 || idx == VARYING_SLOT_LAYER || - idx == VARYING_SLOT_PRIMITIVE_ID || idx == VARYING_SLOT_VIEWPORT) + u_foreach_bit64 (idx, mask) { + if (idx >= VARYING_SLOT_VAR0 || idx == VARYING_SLOT_LAYER || idx == VARYING_SLOT_PRIMITIVE_ID || + idx == VARYING_SLOT_VIEWPORT) assign_outinfo_param(outinfo, idx, total_param_exports, extra_offset); } } static uint8_t -radv_get_wave_size(struct radv_device *device, gl_shader_stage stage, - const struct radv_shader_info *info) +radv_get_wave_size(struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info) { if (stage == MESA_SHADER_GEOMETRY && !info->is_ngg) return 64; @@ -357,8 +352,7 @@ radv_get_wave_size(struct radv_device *device, gl_shader_stage stage, } static uint8_t -radv_get_ballot_bit_size(struct radv_device *device, gl_shader_stage stage, - const struct radv_shader_info *info) +radv_get_ballot_bit_size(struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info) { if (stage == MESA_SHADER_COMPUTE && info->cs.subgroup_size) return info->cs.subgroup_size; @@ -381,8 +375,7 @@ gather_info_input_decl_vs(const nir_shader *nir, unsigned location, const struct else info->vs.vb_desc_usage_mask |= BITFIELD_BIT(key->vs.vertex_attribute_bindings[location]); - info->vs.input_slot_usage_mask |= - BITFIELD_RANGE(location, glsl_count_attribute_slots(type, false)); + info->vs.input_slot_usage_mask |= BITFIELD_RANGE(location, glsl_count_attribute_slots(type, false)); } else if (glsl_type_is_matrix(type) || glsl_type_is_array(type)) { const struct glsl_type *elem = glsl_get_array_element(type); unsigned stride = glsl_count_attribute_slots(elem, false); @@ -401,8 +394,8 @@ gather_info_input_decl_vs(const nir_shader *nir, unsigned location, const struct } static void -gather_shader_info_vs(struct radv_device *device, const nir_shader *nir, - const struct radv_pipeline_key *pipeline_key, struct radv_shader_info *info) +gather_shader_info_vs(struct radv_device *device, const nir_shader *nir, const struct radv_pipeline_key *pipeline_key, + struct radv_shader_info *info) { if (pipeline_key->vs.has_prolog && nir->info.inputs_read) { info->vs.has_prolog = true; @@ -419,9 +412,8 @@ gather_shader_info_vs(struct radv_device *device, const nir_shader *nir, info->vs.needs_base_instance |= info->vs.has_prolog; info->vs.needs_draw_id |= info->vs.has_prolog; - nir_foreach_shader_in_variable(var, nir) - gather_info_input_decl_vs(nir, var->data.location - VERT_ATTRIB_GENERIC0, var->type, - pipeline_key, info); + nir_foreach_shader_in_variable (var, nir) + gather_info_input_decl_vs(nir, var->data.location - VERT_ATTRIB_GENERIC0, var->type, pipeline_key, info); if (info->vs.dynamic_inputs) info->vs.vb_desc_usage_mask = BITFIELD_MASK(util_last_bit(info->vs.vb_desc_usage_mask)); @@ -435,8 +427,8 @@ gather_shader_info_vs(struct radv_device *device, const nir_shader *nir, } static void -gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir, - const struct radv_pipeline_key *pipeline_key, struct radv_shader_info *info) +gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir, const struct radv_pipeline_key *pipeline_key, + struct radv_shader_info *info) { info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out; @@ -445,18 +437,14 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir, info->num_tess_patches = get_tcs_num_patches(pipeline_key->tcs.tess_input_vertices, nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs, info->tcs.num_linked_outputs, - info->tcs.num_linked_patch_outputs, - device->physical_device->hs.tess_offchip_block_dw_size, - device->physical_device->rad_info.gfx_level, - device->physical_device->rad_info.family); + info->tcs.num_linked_patch_outputs, device->physical_device->hs.tess_offchip_block_dw_size, + device->physical_device->rad_info.gfx_level, device->physical_device->rad_info.family); /* LDS size used by VS+TCS for storing TCS inputs and outputs. */ info->tcs.num_lds_blocks = - calculate_tess_lds_size(device->physical_device->rad_info.gfx_level, - pipeline_key->tcs.tess_input_vertices, - nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs, - info->num_tess_patches, info->tcs.num_linked_outputs, - info->tcs.num_linked_patch_outputs); + calculate_tess_lds_size(device->physical_device->rad_info.gfx_level, pipeline_key->tcs.tess_input_vertices, + nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs, info->num_tess_patches, + info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs); } } @@ -481,10 +469,9 @@ gather_shader_info_gs(const nir_shader *nir, struct radv_shader_info *info) info->gs.input_prim = nir->info.gs.input_primitive; info->gs.output_prim = nir->info.gs.output_primitive; info->gs.invocations = nir->info.gs.invocations; - info->gs.max_stream = - nir->info.gs.active_stream_mask ? util_last_bit(nir->info.gs.active_stream_mask) - 1 : 0; + info->gs.max_stream = nir->info.gs.active_stream_mask ? util_last_bit(nir->info.gs.active_stream_mask) - 1 : 0; - nir_foreach_shader_out_variable(var, nir) { + nir_foreach_shader_out_variable (var, nir) { unsigned num_components = glsl_get_component_slots(var->type); unsigned stream = var->data.stream; @@ -535,12 +522,10 @@ gather_shader_info_mesh(const nir_shader *nir, struct radv_shader_info *info) ngg_info->prim_amp_factor = nir->info.mesh.max_primitives_out; ngg_info->vgt_esgs_ring_itemsize = 1; - unsigned min_ngg_workgroup_size = - ac_compute_ngg_workgroup_size(ngg_info->hw_max_esverts, ngg_info->max_gsprims, - ngg_info->max_out_verts, ngg_info->prim_amp_factor); + unsigned min_ngg_workgroup_size = ac_compute_ngg_workgroup_size(ngg_info->hw_max_esverts, ngg_info->max_gsprims, + ngg_info->max_out_verts, ngg_info->prim_amp_factor); - unsigned api_workgroup_size = - ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX); + unsigned api_workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX); info->workgroup_size = MAX2(min_ngg_workgroup_size, api_workgroup_size); } @@ -565,11 +550,11 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir, } info->ps.can_discard = nir->info.fs.uses_discard; - info->ps.early_fragment_test = nir->info.fs.early_fragment_tests || - (nir->info.fs.early_and_late_fragment_tests && - nir->info.fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE && - nir->info.fs.stencil_front_layout == FRAG_STENCIL_LAYOUT_NONE && - nir->info.fs.stencil_back_layout == FRAG_STENCIL_LAYOUT_NONE); + info->ps.early_fragment_test = + nir->info.fs.early_fragment_tests || + (nir->info.fs.early_and_late_fragment_tests && nir->info.fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE && + nir->info.fs.stencil_front_layout == FRAG_STENCIL_LAYOUT_NONE && + nir->info.fs.stencil_back_layout == FRAG_STENCIL_LAYOUT_NONE); info->ps.post_depth_coverage = nir->info.fs.post_depth_coverage; info->ps.depth_layout = nir->info.fs.depth_layout; info->ps.uses_sample_shading = nir->info.fs.uses_sample_shading; @@ -586,20 +571,15 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir, info->ps.reads_frag_shading_rate = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_SHADING_RATE); info->ps.reads_front_face = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE); info->ps.reads_barycentric_model = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL); - info->ps.reads_fully_covered = - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FULLY_COVERED); + info->ps.reads_fully_covered = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FULLY_COVERED); - bool uses_persp_or_linear_interp = info->ps.reads_persp_center || - info->ps.reads_persp_centroid || - info->ps.reads_persp_sample || - info->ps.reads_linear_center || - info->ps.reads_linear_centroid || - info->ps.reads_linear_sample; + bool uses_persp_or_linear_interp = info->ps.reads_persp_center || info->ps.reads_persp_centroid || + info->ps.reads_persp_sample || info->ps.reads_linear_center || + info->ps.reads_linear_centroid || info->ps.reads_linear_sample; info->ps.allow_flat_shading = - !(uses_persp_or_linear_interp || info->ps.needs_sample_positions || - info->ps.reads_frag_shading_rate || info->ps.writes_memory || - nir->info.fs.needs_quad_helper_invocations || + !(uses_persp_or_linear_interp || info->ps.needs_sample_positions || info->ps.reads_frag_shading_rate || + info->ps.writes_memory || nir->info.fs.needs_quad_helper_invocations || BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) || BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) || BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) || @@ -611,17 +591,15 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir, info->ps.has_epilog = pipeline_key->ps.has_epilog && info->ps.colors_written; - info->ps.writes_mrt0_alpha = - (pipeline_key->ps.alpha_to_coverage_via_mrtz && (info->ps.color0_written & 0x8)) && - (info->ps.writes_z || info->ps.writes_stencil || info->ps.writes_sample_mask); + info->ps.writes_mrt0_alpha = (pipeline_key->ps.alpha_to_coverage_via_mrtz && (info->ps.color0_written & 0x8)) && + (info->ps.writes_z || info->ps.writes_stencil || info->ps.writes_sample_mask); info->ps.mrt0_is_dual_src = pipeline_key->ps.epilog.mrt0_is_dual_src; info->ps.spi_shader_col_format = pipeline_key->ps.epilog.spi_shader_col_format; - nir_foreach_shader_in_variable(var, nir) { - const struct glsl_type *type = - var->data.per_vertex ? glsl_get_array_element(var->type) : var->type; + nir_foreach_shader_in_variable (var, nir) { + const struct glsl_type *type = var->data.per_vertex ? glsl_get_array_element(var->type) : var->type; unsigned attrib_count = glsl_count_attribute_slots(type, false); int idx = var->data.location; @@ -673,8 +651,8 @@ gather_shader_info_rt(const nir_shader *nir, struct radv_shader_info *info) } static void -gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, - const struct radv_pipeline_key *pipeline_key, struct radv_shader_info *info) +gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, const struct radv_pipeline_key *pipeline_key, + struct radv_shader_info *info) { info->cs.uses_ray_launch_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_RAY_LAUNCH_SIZE_ADDR_AMD); @@ -689,8 +667,7 @@ gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, if (!subgroup_size) subgroup_size = default_wave_size; - unsigned local_size = - nir->info.workgroup_size[0] * nir->info.workgroup_size[1] * nir->info.workgroup_size[2]; + unsigned local_size = nir->info.workgroup_size[0] * nir->info.workgroup_size[1] * nir->info.workgroup_size[2]; /* Games don't always request full subgroups when they should, which can cause bugs if cswave32 * is enabled. @@ -707,8 +684,7 @@ gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, info->cs.subgroup_size = subgroup_size; if (device->physical_device->rad_info.has_cs_regalloc_hang_bug) { - info->cs.regalloc_hang_bug = - info->cs.block_size[0] * info->cs.block_size[1] * info->cs.block_size[2] > 256; + info->cs.regalloc_hang_bug = info->cs.block_size[0] * info->cs.block_size[1] * info->cs.block_size[2] > 256; } } @@ -731,8 +707,8 @@ gather_shader_info_task(const nir_shader *nir, struct radv_shader_info *info) /* Task->Mesh dispatch is linear when Y = Z = 1. * GFX11 CP can optimize this case with a field in its draw packets. */ - info->cs.linear_taskmesh_dispatch = nir->info.mesh.ts_mesh_dispatch_dimensions[1] == 1 && - nir->info.mesh.ts_mesh_dispatch_dimensions[2] == 1; + info->cs.linear_taskmesh_dispatch = + nir->info.mesh.ts_mesh_dispatch_dimensions[1] == 1 && nir->info.mesh.ts_mesh_dispatch_dimensions[2] == 1; } static uint32_t @@ -772,11 +748,9 @@ radv_get_user_data_0(const struct radv_device *device, struct radv_shader_info * assert(info->stage != MESA_SHADER_MESH); return R_00B130_SPI_SHADER_USER_DATA_VS_0; case MESA_SHADER_TESS_CTRL: - return gfx_level == GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0 - : R_00B430_SPI_SHADER_USER_DATA_HS_0; + return gfx_level == GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0 : R_00B430_SPI_SHADER_USER_DATA_HS_0; case MESA_SHADER_GEOMETRY: - return gfx_level == GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0 - : R_00B230_SPI_SHADER_USER_DATA_GS_0; + return gfx_level == GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : R_00B230_SPI_SHADER_USER_DATA_GS_0; case MESA_SHADER_FRAGMENT: return R_00B030_SPI_SHADER_USER_DATA_PS_0; case MESA_SHADER_COMPUTE: @@ -803,12 +777,9 @@ radv_nir_shader_info_init(struct radv_shader_info *info) } void -radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir, - gl_shader_stage next_stage, - const struct radv_pipeline_layout *layout, - const struct radv_pipeline_key *pipeline_key, - const enum radv_pipeline_type pipeline_type, - bool consider_force_vrs, +radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir, gl_shader_stage next_stage, + const struct radv_pipeline_layout *layout, const struct radv_pipeline_key *pipeline_key, + const enum radv_pipeline_type pipeline_type, bool consider_force_vrs, struct radv_shader_info *info) { info->stage = nir->info.stage; @@ -838,10 +809,8 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n uint64_t special_mask = BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_COUNT) | BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES) | BITFIELD64_BIT(VARYING_SLOT_CULL_PRIMITIVE); - uint64_t per_prim_mask = - nir->info.outputs_written & nir->info.per_primitive_outputs & ~special_mask; - uint64_t per_vtx_mask = - nir->info.outputs_written & ~nir->info.per_primitive_outputs & ~special_mask; + uint64_t per_prim_mask = nir->info.outputs_written & nir->info.per_primitive_outputs & ~special_mask; + uint64_t per_vtx_mask = nir->info.outputs_written & ~nir->info.per_primitive_outputs & ~special_mask; /* Mesh multivew is only lowered in ac_nir_lower_ngg, so we have to fake it here. */ if (nir->info.stage == MESA_SHADER_MESH && pipeline_key->has_multiview_view_index) { @@ -882,8 +851,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n outinfo->pos_exports = util_bitcount(pos_written); - memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, - sizeof(outinfo->vs_output_param_offset)); + memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, sizeof(outinfo->vs_output_param_offset)); unsigned total_param_exports = 0; @@ -961,18 +929,15 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n switch (nir->info.stage) { case MESA_SHADER_COMPUTE: case MESA_SHADER_TASK: - info->workgroup_size = - ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX); + info->workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX); /* Allow the compiler to assume that the shader always has full subgroups, * meaning that the initial EXEC mask is -1 in all waves (all lanes enabled). * This assumption is incorrect for ray tracing and internal (meta) shaders * because they can use unaligned dispatch. */ - info->cs.uses_full_subgroups = - pipeline_type != RADV_PIPELINE_RAY_TRACING && - !nir->info.internal && - (info->workgroup_size % info->wave_size) == 0; + info->cs.uses_full_subgroups = pipeline_type != RADV_PIPELINE_RAY_TRACING && !nir->info.internal && + (info->workgroup_size % info->wave_size) == 0; break; case MESA_SHADER_MESH: /* Already computed in gather_shader_info_mesh(). */ @@ -1030,8 +995,8 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_pipeline_s struct radv_legacy_gs_info *out = &gs_stage->info.gs_ring_info; const unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1); - const bool uses_adjacency = gs_info->gs.input_prim == MESA_PRIM_LINES_ADJACENCY || - gs_info->gs.input_prim == MESA_PRIM_TRIANGLES_ADJACENCY; + const bool uses_adjacency = + gs_info->gs.input_prim == MESA_PRIM_LINES_ADJACENCY || gs_info->gs.input_prim == MESA_PRIM_TRIANGLES_ADJACENCY; /* All these are in dwords: */ /* We can't allow using the whole LDS, because GS waves compete with @@ -1056,8 +1021,7 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_pipeline_s * Make sure we don't go over the maximum value. */ if (gs_info->gs.vertices_out > 0) { - max_gs_prims = - MIN2(max_gs_prims, max_out_prims / (gs_info->gs.vertices_out * gs_num_invocations)); + max_gs_prims = MIN2(max_gs_prims, max_out_prims / (gs_info->gs.vertices_out * gs_num_invocations)); } assert(max_gs_prims > 0); @@ -1123,8 +1087,8 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_pipeline_s out->vgt_esgs_ring_itemsize = esgs_itemsize; assert(max_prims_per_subgroup <= max_out_prims); - unsigned workgroup_size = ac_compute_esgs_workgroup_size(gfx_level, es_info->wave_size, - es_verts_per_subgroup, gs_inst_prims_in_subgroup); + unsigned workgroup_size = + ac_compute_esgs_workgroup_size(gfx_level, es_info->wave_size, es_verts_per_subgroup, gs_inst_prims_in_subgroup); es_info->workgroup_size = workgroup_size; gs_info->workgroup_size = workgroup_size; @@ -1132,8 +1096,7 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_pipeline_s } static void -clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts, unsigned min_verts_per_prim, - bool use_adjacency) +clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts, unsigned min_verts_per_prim, bool use_adjacency) { unsigned max_reuse = max_esverts - min_verts_per_prim; if (use_adjacency) @@ -1142,8 +1105,7 @@ clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts, unsigned m } static unsigned -radv_get_num_input_vertices(const struct radv_pipeline_stage *es_stage, - const struct radv_pipeline_stage *gs_stage) +radv_get_num_input_vertices(const struct radv_pipeline_stage *es_stage, const struct radv_pipeline_stage *gs_stage) { if (gs_stage) { return gs_stage->nir->info.gs.vertices_in; @@ -1161,8 +1123,7 @@ radv_get_num_input_vertices(const struct radv_pipeline_stage *es_stage, } static unsigned -radv_get_pre_rast_input_topology(const struct radv_pipeline_stage *es_stage, - const struct radv_pipeline_stage *gs_stage) +radv_get_pre_rast_input_topology(const struct radv_pipeline_stage *es_stage, const struct radv_pipeline_stage *gs_stage) { if (gs_stage) { return gs_stage->nir->info.gs.input_primitive; @@ -1193,8 +1154,7 @@ gfx10_get_ngg_info(const struct radv_device *device, struct radv_pipeline_stage const unsigned gs_num_invocations = gs_stage ? MAX2(gs_info->gs.invocations, 1) : 1; const unsigned input_prim = radv_get_pre_rast_input_topology(es_stage, gs_stage); - const bool uses_adjacency = input_prim == MESA_PRIM_LINES_ADJACENCY || - input_prim == MESA_PRIM_TRIANGLES_ADJACENCY; + const bool uses_adjacency = input_prim == MESA_PRIM_LINES_ADJACENCY || input_prim == MESA_PRIM_TRIANGLES_ADJACENCY; /* All these are in dwords: */ /* We can't allow using the whole LDS, because GS waves compete with @@ -1211,7 +1171,8 @@ gfx10_get_ngg_info(const struct radv_device *device, struct radv_pipeline_stage /* All these are per subgroup: */ const unsigned min_esverts = gfx_level >= GFX11 ? 3 : /* gfx11 requires at least 1 primitive per TG */ - gfx_level >= GFX10_3 ? 29 : 24; + gfx_level >= GFX10_3 ? 29 + : 24; bool max_vert_out_per_gs_instance = false; unsigned max_esverts_base = 128; unsigned max_gsprims_base = 128; /* default prim group size clamp */ @@ -1315,8 +1276,7 @@ gfx10_get_ngg_info(const struct radv_device *device, struct radv_pipeline_stage max_esverts = align(max_esverts, wavesize); max_esverts = MIN2(max_esverts, max_esverts_base); if (esvert_lds_size) - max_esverts = - MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size); + max_esverts = MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size); max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); /* Hardware restriction: minimum value of max_esverts */ @@ -1335,8 +1295,7 @@ gfx10_get_ngg_info(const struct radv_device *device, struct radv_pipeline_stage * for triangles. */ unsigned usable_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); - max_gsprims = MIN2(max_gsprims, - (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size); + max_gsprims = MIN2(max_gsprims, (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size); } clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency); assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1); @@ -1356,9 +1315,8 @@ gfx10_get_ngg_info(const struct radv_device *device, struct radv_pipeline_stage } unsigned max_out_vertices = max_vert_out_per_gs_instance ? gs_info->gs.vertices_out - : gs_stage - ? max_gsprims * gs_num_invocations * gs_info->gs.vertices_out - : max_esverts; + : gs_stage ? max_gsprims * gs_num_invocations * gs_info->gs.vertices_out + : max_esverts; assert(max_out_vertices <= 256); unsigned prim_amp_factor = 1; @@ -1396,8 +1354,7 @@ gfx10_get_ngg_info(const struct radv_device *device, struct radv_pipeline_stage assert(out->hw_max_esverts >= min_esverts); /* HW limitation */ unsigned workgroup_size = - ac_compute_ngg_workgroup_size( - max_esverts, max_gsprims * gs_num_invocations, max_out_vertices, prim_amp_factor); + ac_compute_ngg_workgroup_size(max_esverts, max_gsprims * gs_num_invocations, max_out_vertices, prim_amp_factor); if (gs_stage) { gs_info->workgroup_size = workgroup_size; } @@ -1406,21 +1363,18 @@ gfx10_get_ngg_info(const struct radv_device *device, struct radv_pipeline_stage static void gfx10_get_ngg_query_info(const struct radv_device *device, struct radv_pipeline_stage *es_stage, - struct radv_pipeline_stage *gs_stage, - const struct radv_pipeline_key *pipeline_key) + struct radv_pipeline_stage *gs_stage, const struct radv_pipeline_key *pipeline_key) { struct radv_shader_info *info = gs_stage ? &gs_stage->info : &es_stage->info; - info->gs.has_ngg_pipeline_stat_query = - device->physical_device->emulate_ngg_gs_query_pipeline_stat && !!gs_stage; + info->gs.has_ngg_pipeline_stat_query = device->physical_device->emulate_ngg_gs_query_pipeline_stat && !!gs_stage; info->has_ngg_xfb_query = gs_stage ? !!gs_stage->nir->xfb_info : !!es_stage->nir->xfb_info; info->has_ngg_prim_query = pipeline_key->primitives_generated_query || info->has_ngg_xfb_query; } static void radv_determine_ngg_settings(struct radv_device *device, struct radv_pipeline_stage *es_stage, - struct radv_pipeline_stage *fs_stage, - const struct radv_pipeline_key *pipeline_key) + struct radv_pipeline_stage *fs_stage, const struct radv_pipeline_key *pipeline_key) { assert(es_stage->stage == MESA_SHADER_VERTEX || es_stage->stage == MESA_SHADER_TESS_EVAL); assert(!fs_stage || fs_stage->stage == MESA_SHADER_FRAGMENT); @@ -1431,15 +1385,15 @@ radv_determine_ngg_settings(struct radv_device *device, struct radv_pipeline_sta if (es_stage->stage == MESA_SHADER_VERTEX) { num_vertices_per_prim = radv_get_num_vertices_per_prim(pipeline_key); } else if (es_stage->stage == MESA_SHADER_TESS_EVAL) { - num_vertices_per_prim = es_stage->nir->info.tess.point_mode ? 1 : - es_stage->nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES ? 2 : 3; + num_vertices_per_prim = es_stage->nir->info.tess.point_mode ? 1 + : es_stage->nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES ? 2 + : 3; } /* TODO: Enable culling for LLVM. */ - es_stage->info.has_ngg_culling = - radv_consider_culling(device->physical_device, es_stage->nir, ps_inputs_read, - num_vertices_per_prim, &es_stage->info) && - !radv_use_llvm_for_stage(device, es_stage->stage); + es_stage->info.has_ngg_culling = radv_consider_culling(device->physical_device, es_stage->nir, ps_inputs_read, + num_vertices_per_prim, &es_stage->info) && + !radv_use_llvm_for_stage(device, es_stage->stage); nir_function_impl *impl = nir_shader_get_entrypoint(es_stage->nir); es_stage->info.has_ngg_early_prim_export = exec_list_is_singular(&impl->body); @@ -1447,14 +1401,13 @@ radv_determine_ngg_settings(struct radv_device *device, struct radv_pipeline_sta /* NGG passthrough mode should be disabled when culling and when the vertex shader * exports the primitive ID. */ - es_stage->info.is_ngg_passthrough = !es_stage->info.has_ngg_culling && - !(es_stage->stage == MESA_SHADER_VERTEX && es_stage->info.outinfo.export_prim_id); + es_stage->info.is_ngg_passthrough = !es_stage->info.has_ngg_culling && !(es_stage->stage == MESA_SHADER_VERTEX && + es_stage->info.outinfo.export_prim_id); } static void -radv_link_shaders_info(struct radv_device *device, - struct radv_pipeline_stage *producer, struct radv_pipeline_stage *consumer, - const struct radv_pipeline_key *pipeline_key) +radv_link_shaders_info(struct radv_device *device, struct radv_pipeline_stage *producer, + struct radv_pipeline_stage *consumer, const struct radv_pipeline_key *pipeline_key) { /* Export primitive ID and clip/cull distances if read by the FS, or export unconditionally when * the next stage is unknown (with graphics pipeline library). @@ -1465,8 +1418,7 @@ radv_link_shaders_info(struct radv_device *device, const bool ps_prim_id_in = !consumer || consumer->info.ps.prim_id_input; const bool ps_clip_dists_in = !consumer || !!consumer->info.ps.num_input_clips_culls; - if (ps_prim_id_in && - (producer->stage == MESA_SHADER_VERTEX || producer->stage == MESA_SHADER_TESS_EVAL)) { + if (ps_prim_id_in && (producer->stage == MESA_SHADER_VERTEX || producer->stage == MESA_SHADER_TESS_EVAL)) { /* Mark the primitive ID as output when it's implicitly exported by VS or TES. */ if (outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] == AC_EXP_PARAM_UNDEFINED) outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = outinfo->param_exports++; @@ -1499,17 +1451,16 @@ radv_link_shaders_info(struct radv_device *device, /* Compute the ESGS item size for VS or TES as ES. */ producer->info.esgs_itemsize = num_outputs_written * 16; - /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank - * conflicts, i.e. each vertex will start on a different bank. - */ + /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank + * conflicts, i.e. each vertex will start on a different bank. + */ if (device->physical_device->rad_info.gfx_level >= GFX9 && producer->info.esgs_itemsize) producer->info.esgs_itemsize += 4; } /* Compute NGG info (GFX10+) or GS info. */ if (producer->info.is_ngg) { - struct radv_pipeline_stage *gs_stage = - consumer && consumer->stage == MESA_SHADER_GEOMETRY ? consumer : NULL; + struct radv_pipeline_stage *gs_stage = consumer && consumer->stage == MESA_SHADER_GEOMETRY ? consumer : NULL; gfx10_get_ngg_info(device, producer, gs_stage); gfx10_get_ngg_query_info(device, producer, gs_stage, pipeline_key); @@ -1523,8 +1474,7 @@ radv_link_shaders_info(struct radv_device *device, } } - if (producer->stage == MESA_SHADER_VERTEX && - consumer && consumer->stage == MESA_SHADER_TESS_CTRL) { + if (producer->stage == MESA_SHADER_VERTEX && consumer && consumer->stage == MESA_SHADER_TESS_CTRL) { struct radv_pipeline_stage *vs_stage = producer; struct radv_pipeline_stage *tcs_stage = consumer; @@ -1537,17 +1487,13 @@ radv_link_shaders_info(struct radv_device *device, vs_stage->info.workgroup_size = 256; tcs_stage->info.workgroup_size = 256; } else { - vs_stage->info.workgroup_size = - ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level, - MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches, - pipeline_key->tcs.tess_input_vertices, - tcs_stage->info.tcs.tcs_vertices_out); - - tcs_stage->info.workgroup_size = - ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level, - MESA_SHADER_TESS_CTRL, tcs_stage->info.num_tess_patches, - pipeline_key->tcs.tess_input_vertices, - tcs_stage->info.tcs.tcs_vertices_out); + vs_stage->info.workgroup_size = ac_compute_lshs_workgroup_size( + device->physical_device->rad_info.gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches, + pipeline_key->tcs.tess_input_vertices, tcs_stage->info.tcs.tcs_vertices_out); + + tcs_stage->info.workgroup_size = ac_compute_lshs_workgroup_size( + device->physical_device->rad_info.gfx_level, MESA_SHADER_TESS_CTRL, tcs_stage->info.num_tess_patches, + pipeline_key->tcs.tess_input_vertices, tcs_stage->info.tcs.tcs_vertices_out); if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) { /* When the number of TCS input and output vertices are the same (typically 3): @@ -1562,16 +1508,13 @@ radv_link_shaders_info(struct radv_device *device, vs_stage->info.vs.tcs_in_out_eq = device->physical_device->rad_info.gfx_level >= GFX9 && pipeline_key->tcs.tess_input_vertices == tcs_stage->info.tcs.tcs_vertices_out && - vs_stage->nir->info.float_controls_execution_mode == - tcs_stage->nir->info.float_controls_execution_mode; + vs_stage->nir->info.float_controls_execution_mode == tcs_stage->nir->info.float_controls_execution_mode; if (vs_stage->info.vs.tcs_in_out_eq) vs_stage->info.vs.tcs_temp_only_input_mask = - tcs_stage->nir->info.inputs_read & - vs_stage->nir->info.outputs_written & + tcs_stage->nir->info.inputs_read & vs_stage->nir->info.outputs_written & ~tcs_stage->nir->info.tess.tcs_cross_invocation_inputs_read & - ~tcs_stage->nir->info.inputs_read_indirectly & - ~vs_stage->nir->info.outputs_accessed_indirectly; + ~tcs_stage->nir->info.inputs_read_indirectly & ~vs_stage->nir->info.outputs_accessed_indirectly; } } } @@ -1627,13 +1570,9 @@ radv_nir_shader_info_merge(const struct radv_pipeline_stage *src, struct radv_pi } static const gl_shader_stage graphics_shader_order[] = { - MESA_SHADER_VERTEX, - MESA_SHADER_TESS_CTRL, - MESA_SHADER_TESS_EVAL, - MESA_SHADER_GEOMETRY, + MESA_SHADER_VERTEX, MESA_SHADER_TESS_CTRL, MESA_SHADER_TESS_EVAL, MESA_SHADER_GEOMETRY, - MESA_SHADER_TASK, - MESA_SHADER_MESH, + MESA_SHADER_TASK, MESA_SHADER_MESH, }; void @@ -1641,8 +1580,7 @@ radv_nir_shader_info_link(struct radv_device *device, const struct radv_pipeline struct radv_pipeline_stage *stages) { /* Walk backwards to link */ - struct radv_pipeline_stage *next_stage = - stages[MESA_SHADER_FRAGMENT].nir ? &stages[MESA_SHADER_FRAGMENT] : NULL; + struct radv_pipeline_stage *next_stage = stages[MESA_SHADER_FRAGMENT].nir ? &stages[MESA_SHADER_FRAGMENT] : NULL; for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) { gl_shader_stage s = graphics_shader_order[i]; @@ -1661,8 +1599,7 @@ radv_nir_shader_info_link(struct radv_device *device, const struct radv_pipeline /* Merge shader info for VS+GS or TES+GS. */ if (stages[MESA_SHADER_GEOMETRY].nir) { - gl_shader_stage pre_stage = - stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; + gl_shader_stage pre_stage = stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; radv_nir_shader_info_merge(&stages[pre_stage], &stages[MESA_SHADER_GEOMETRY]); } diff --git a/src/amd/vulkan/radv_spm.c b/src/amd/vulkan/radv_spm.c index fc81fba..6c076f1 100644 --- a/src/amd/vulkan/radv_spm.c +++ b/src/amd/vulkan/radv_spm.c @@ -34,17 +34,16 @@ radv_spm_init_bo(struct radv_device *device) { struct radeon_winsys *ws = device->ws; uint64_t size = 32 * 1024 * 1024; /* Default to 1MB. */ - uint16_t sample_interval = 4096; /* Default to 4096 clk. */ + uint16_t sample_interval = 4096; /* Default to 4096 clk. */ VkResult result; device->spm.buffer_size = size; device->spm.sample_interval = sample_interval; struct radeon_winsys_bo *bo = NULL; - result = ws->buffer_create( - ws, size, 4096, RADEON_DOMAIN_VRAM, - RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM, - RADV_BO_PRIORITY_SCRATCH, 0, &bo); + result = ws->buffer_create(ws, size, 4096, RADEON_DOMAIN_VRAM, + RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM, + RADV_BO_PRIORITY_SCRATCH, 0, &bo); device->spm.bo = bo; if (result != VK_SUCCESS) return false; @@ -95,9 +94,9 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs) } /* Restore global broadcasting. */ - radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, - S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | - S_030800_INSTANCE_BROADCAST_WRITES(1)); + radeon_set_uconfig_reg( + cs, R_030800_GRBM_GFX_INDEX, + S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1)); } void @@ -114,11 +113,10 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs) /* Configure the SPM ring buffer. */ radeon_set_uconfig_reg(cs, R_037200_RLC_SPM_PERFMON_CNTL, - S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */ - S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */ + S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */ + S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */ radeon_set_uconfig_reg(cs, R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va); - radeon_set_uconfig_reg(cs, R_037208_RLC_SPM_PERFMON_RING_BASE_HI, - S_037208_RING_BASE_HI(va >> 32)); + radeon_set_uconfig_reg(cs, R_037208_RLC_SPM_PERFMON_RING_BASE_HI, S_037208_RING_BASE_HI(va >> 32)); radeon_set_uconfig_reg(cs, R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size); /* Configure the muxsel. */ @@ -129,20 +127,18 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0); radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0); - radeon_set_uconfig_reg(cs, R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE, - S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[0]) | - S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[1]) | - S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[2]) | - S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[3])); - radeon_set_uconfig_reg(cs, R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE, - S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) | - S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[4])); + radeon_set_uconfig_reg( + cs, R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE, + S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[0]) | S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[1]) | + S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[2]) | S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[3])); + radeon_set_uconfig_reg( + cs, R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE, + S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) | S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[4])); /* Upload each muxsel ram to the RLC. */ for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { unsigned rlc_muxsel_addr, rlc_muxsel_data; - unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) | - S_030800_INSTANCE_BROADCAST_WRITES(1); + unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1); if (!spm->num_muxsel_lines[s]) continue; @@ -169,10 +165,8 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs) /* Write the muxsel line configuration with MUXSEL_DATA. */ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_ME) | - S_370_WR_ONE_ADDR(1)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME) | + S_370_WR_ONE_ADDR(1)); radeon_emit(cs, rlc_muxsel_data >> 2); radeon_emit(cs, 0); radeon_emit_array(cs, data, AC_SPM_MUXSEL_LINE_SIZE); @@ -189,18 +183,18 @@ radv_spm_init(struct radv_device *device) const struct radeon_info *info = &device->physical_device->rad_info; struct ac_perfcounters *pc = &device->physical_device->ac_perfcounters; struct ac_spm_counter_create_info spm_counters[] = { - {TCP, 0, 0x9}, /* Number of L2 requests. */ - {TCP, 0, 0x12}, /* Number of L2 misses. */ - {SQ, 0, 0x14f}, /* Number of SCACHE hits. */ - {SQ, 0, 0x150}, /* Number of SCACHE misses. */ - {SQ, 0, 0x151}, /* Number of SCACHE misses duplicate. */ - {SQ, 0, 0x12c}, /* Number of ICACHE hits. */ - {SQ, 0, 0x12d}, /* Number of ICACHE misses. */ - {SQ, 0, 0x12e}, /* Number of ICACHE misses duplicate. */ - {GL1C, 0, 0xe}, /* Number of GL1C requests. */ - {GL1C, 0, 0x12}, /* Number of GL1C misses. */ - {GL2C, 0, 0x3}, /* Number of GL2C requests. */ - {GL2C, 0, info->gfx_level >= GFX10_3 ? 0x2b : 0x23}, /* Number of GL2C misses. */ + {TCP, 0, 0x9}, /* Number of L2 requests. */ + {TCP, 0, 0x12}, /* Number of L2 misses. */ + {SQ, 0, 0x14f}, /* Number of SCACHE hits. */ + {SQ, 0, 0x150}, /* Number of SCACHE misses. */ + {SQ, 0, 0x151}, /* Number of SCACHE misses duplicate. */ + {SQ, 0, 0x12c}, /* Number of ICACHE hits. */ + {SQ, 0, 0x12d}, /* Number of ICACHE misses. */ + {SQ, 0, 0x12e}, /* Number of ICACHE misses duplicate. */ + {GL1C, 0, 0xe}, /* Number of GL1C requests. */ + {GL1C, 0, 0x12}, /* Number of GL1C misses. */ + {GL2C, 0, 0x3}, /* Number of GL2C requests. */ + {GL2C, 0, info->gfx_level >= GFX10_3 ? 0x2b : 0x23}, /* Number of GL2C misses. */ }; /* We failed to initialize the performance counters. */ diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index 2a2e421..d494f72 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -38,10 +38,8 @@ radv_is_instruction_timing_enabled(void) static uint32_t gfx11_get_sqtt_ctrl(const struct radv_device *device, bool enable) { - return S_0367B0_MODE(enable) | S_0367B0_HIWATER(5) | S_0367B0_UTIL_TIMER(1) | - S_0367B0_RT_FREQ(2) | /* 4096 clk */ - S_0367B0_DRAW_EVENT_EN(1) | S_0367B0_SPI_STALL_EN(1) | S_0367B0_SQ_STALL_EN(1) | - S_0367B0_REG_AT_HWM(2); + return S_0367B0_MODE(enable) | S_0367B0_HIWATER(5) | S_0367B0_UTIL_TIMER(1) | S_0367B0_RT_FREQ(2) | /* 4096 clk */ + S_0367B0_DRAW_EVENT_EN(1) | S_0367B0_SPI_STALL_EN(1) | S_0367B0_SQ_STALL_EN(1) | S_0367B0_REG_AT_HWM(2); } static uint32_t @@ -49,9 +47,8 @@ gfx10_get_sqtt_ctrl(const struct radv_device *device, bool enable) { uint32_t sqtt_ctrl = S_008D1C_MODE(enable) | S_008D1C_HIWATER(5) | S_008D1C_UTIL_TIMER(1) | S_008D1C_RT_FREQ(2) | /* 4096 clk */ - S_008D1C_DRAW_EVENT_EN(1) | S_008D1C_REG_STALL_EN(1) | - S_008D1C_SPI_STALL_EN(1) | S_008D1C_SQ_STALL_EN(1) | - S_008D1C_REG_DROP_ON_STALL(0); + S_008D1C_DRAW_EVENT_EN(1) | S_008D1C_REG_STALL_EN(1) | S_008D1C_SPI_STALL_EN(1) | + S_008D1C_SQ_STALL_EN(1) | S_008D1C_REG_DROP_ON_STALL(0); if (device->physical_device->rad_info.gfx_level == GFX10_3) sqtt_ctrl |= S_008D1C_LOWATER_OFFSET(4); @@ -69,17 +66,14 @@ radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf * si_cs_emit_cache_flush( device->ws, cs, device->physical_device->rad_info.gfx_level, NULL, 0, family == AMD_IP_COMPUTE && device->physical_device->rad_info.gfx_level >= GFX7, - (family == RADV_QUEUE_COMPUTE - ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH - : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) | - RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | - RADV_CMD_FLAG_INV_L2, + (family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH + : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) | + RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2, &sqtt_flush_bits, 0); } static void -radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, - enum radv_queue_family qf) +radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf) { uint32_t shifted_size = device->sqtt.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT; const struct radeon_info *rad_info = &device->physical_device->rad_info; @@ -95,9 +89,8 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, continue; /* Target SEx and SH0. */ - radeon_set_uconfig_reg( - cs, R_030800_GRBM_GFX_INDEX, - S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1)); + radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, + S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1)); if (device->physical_device->rad_info.gfx_level >= GFX11) { /* Order seems important for the following 2 registers. */ @@ -108,12 +101,11 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, radeon_set_uconfig_reg(cs, R_0367B4_SQ_THREAD_TRACE_MASK, S_0367B4_WTYPE_INCLUDE(0x7f) | /* all shader stages */ - S_0367B4_SA_SEL(0) | S_0367B4_WGP_SEL(first_active_cu / 2) | - S_0367B4_SIMD_SEL(0)); + S_0367B4_SA_SEL(0) | S_0367B4_WGP_SEL(first_active_cu / 2) | S_0367B4_SIMD_SEL(0)); - uint32_t sqtt_token_mask = S_0367B8_REG_INCLUDE( - V_0367B8_REG_INCLUDE_SQDEC | V_0367B8_REG_INCLUDE_SHDEC | V_0367B8_REG_INCLUDE_GFXUDEC | - V_0367B8_REG_INCLUDE_COMP | V_0367B8_REG_INCLUDE_CONTEXT | V_0367B8_REG_INCLUDE_CONFIG); + uint32_t sqtt_token_mask = S_0367B8_REG_INCLUDE(V_0367B8_REG_INCLUDE_SQDEC | V_0367B8_REG_INCLUDE_SHDEC | + V_0367B8_REG_INCLUDE_GFXUDEC | V_0367B8_REG_INCLUDE_COMP | + V_0367B8_REG_INCLUDE_CONTEXT | V_0367B8_REG_INCLUDE_CONFIG); /* Performance counters with SQTT are considered deprecated. */ uint32_t token_exclude = V_0367B8_TOKEN_EXCLUDE_PERF; @@ -129,34 +121,30 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, radeon_set_uconfig_reg(cs, R_0367B8_SQ_THREAD_TRACE_TOKEN_MASK, sqtt_token_mask); /* Should be emitted last (it enables thread traces). */ - radeon_set_uconfig_reg(cs, R_0367B0_SQ_THREAD_TRACE_CTRL, - gfx11_get_sqtt_ctrl(device, true)); + radeon_set_uconfig_reg(cs, R_0367B0_SQ_THREAD_TRACE_CTRL, gfx11_get_sqtt_ctrl(device, true)); } else if (device->physical_device->rad_info.gfx_level >= GFX10) { /* Order seems important for the following 2 registers. */ - radeon_set_privileged_config_reg( - cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE, - S_008D04_SIZE(shifted_size) | S_008D04_BASE_HI(shifted_va >> 32)); + radeon_set_privileged_config_reg(cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE, + S_008D04_SIZE(shifted_size) | S_008D04_BASE_HI(shifted_va >> 32)); radeon_set_privileged_config_reg(cs, R_008D00_SQ_THREAD_TRACE_BUF0_BASE, shifted_va); - radeon_set_privileged_config_reg( - cs, R_008D14_SQ_THREAD_TRACE_MASK, - S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */ - S_008D14_SA_SEL(0) | S_008D14_WGP_SEL(first_active_cu / 2) | S_008D14_SIMD_SEL(0)); + radeon_set_privileged_config_reg(cs, R_008D14_SQ_THREAD_TRACE_MASK, + S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */ + S_008D14_SA_SEL(0) | S_008D14_WGP_SEL(first_active_cu / 2) | + S_008D14_SIMD_SEL(0)); - uint32_t sqtt_token_mask = S_008D18_REG_INCLUDE( - V_008D18_REG_INCLUDE_SQDEC | V_008D18_REG_INCLUDE_SHDEC | V_008D18_REG_INCLUDE_GFXUDEC | - V_008D18_REG_INCLUDE_COMP | V_008D18_REG_INCLUDE_CONTEXT | V_008D18_REG_INCLUDE_CONFIG); + uint32_t sqtt_token_mask = S_008D18_REG_INCLUDE(V_008D18_REG_INCLUDE_SQDEC | V_008D18_REG_INCLUDE_SHDEC | + V_008D18_REG_INCLUDE_GFXUDEC | V_008D18_REG_INCLUDE_COMP | + V_008D18_REG_INCLUDE_CONTEXT | V_008D18_REG_INCLUDE_CONFIG); /* Performance counters with SQTT are considered deprecated. */ uint32_t token_exclude = V_008D18_TOKEN_EXCLUDE_PERF; if (!radv_is_instruction_timing_enabled()) { /* Reduce SQTT traffic when instruction timing isn't enabled. */ - token_exclude |= V_008D18_TOKEN_EXCLUDE_VMEMEXEC | - V_008D18_TOKEN_EXCLUDE_ALUEXEC | - V_008D18_TOKEN_EXCLUDE_VALUINST | - V_008D18_TOKEN_EXCLUDE_IMMEDIATE | + token_exclude |= V_008D18_TOKEN_EXCLUDE_VMEMEXEC | V_008D18_TOKEN_EXCLUDE_ALUEXEC | + V_008D18_TOKEN_EXCLUDE_VALUINST | V_008D18_TOKEN_EXCLUDE_IMMEDIATE | V_008D18_TOKEN_EXCLUDE_INST; } sqtt_token_mask |= S_008D18_TOKEN_EXCLUDE(token_exclude); @@ -164,12 +152,10 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, radeon_set_privileged_config_reg(cs, R_008D18_SQ_THREAD_TRACE_TOKEN_MASK, sqtt_token_mask); /* Should be emitted last (it enables thread traces). */ - radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL, - gfx10_get_sqtt_ctrl(device, true)); + radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL, gfx10_get_sqtt_ctrl(device, true)); } else { /* Order seems important for the following 4 registers. */ - radeon_set_uconfig_reg(cs, R_030CDC_SQ_THREAD_TRACE_BASE2, - S_030CDC_ADDR_HI(shifted_va >> 32)); + radeon_set_uconfig_reg(cs, R_030CDC_SQ_THREAD_TRACE_BASE2, S_030CDC_ADDR_HI(shifted_va >> 32)); radeon_set_uconfig_reg(cs, R_030CC0_SQ_THREAD_TRACE_BASE, shifted_va); @@ -177,9 +163,8 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, radeon_set_uconfig_reg(cs, R_030CD4_SQ_THREAD_TRACE_CTRL, S_030CD4_RESET_BUFFER(1)); - uint32_t sqtt_mask = S_030CC8_CU_SEL(first_active_cu) | S_030CC8_SH_SEL(0) | - S_030CC8_SIMD_EN(0xf) | S_030CC8_VM_ID_MASK(0) | - S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) | + uint32_t sqtt_mask = S_030CC8_CU_SEL(first_active_cu) | S_030CC8_SH_SEL(0) | S_030CC8_SIMD_EN(0xf) | + S_030CC8_VM_ID_MASK(0) | S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) | S_030CC8_SQ_STALL_EN(1); if (device->physical_device->rad_info.gfx_level < GFX9) { @@ -189,9 +174,8 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, radeon_set_uconfig_reg(cs, R_030CC8_SQ_THREAD_TRACE_MASK, sqtt_mask); /* Trace all tokens and registers. */ - radeon_set_uconfig_reg( - cs, R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK, - S_030CCC_TOKEN_MASK(0xbfff) | S_030CCC_REG_MASK(0xff) | S_030CCC_REG_DROP_ON_STALL(0)); + radeon_set_uconfig_reg(cs, R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK, + S_030CCC_TOKEN_MASK(0xbfff) | S_030CCC_REG_MASK(0xff) | S_030CCC_REG_DROP_ON_STALL(0)); /* Enable SQTT perf counters for all CUs. */ radeon_set_uconfig_reg(cs, R_030CD0_SQ_THREAD_TRACE_PERF_MASK, @@ -207,11 +191,10 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, } /* Enable the thread trace mode. */ - uint32_t sqtt_mode = - S_030CD8_MASK_PS(1) | S_030CD8_MASK_VS(1) | S_030CD8_MASK_GS(1) | S_030CD8_MASK_ES(1) | - S_030CD8_MASK_HS(1) | S_030CD8_MASK_LS(1) | S_030CD8_MASK_CS(1) | - S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */ - S_030CD8_MODE(1); + uint32_t sqtt_mode = S_030CD8_MASK_PS(1) | S_030CD8_MASK_VS(1) | S_030CD8_MASK_GS(1) | S_030CD8_MASK_ES(1) | + S_030CD8_MASK_HS(1) | S_030CD8_MASK_LS(1) | S_030CD8_MASK_CS(1) | + S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */ + S_030CD8_MODE(1); if (device->physical_device->rad_info.gfx_level == GFX9) { /* Count SQTT traffic in TCC perf counters. */ @@ -223,9 +206,9 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, } /* Restore global broadcasting. */ - radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, - S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | - S_030800_INSTANCE_BROADCAST_WRITES(1)); + radeon_set_uconfig_reg( + cs, R_030800_GRBM_GFX_INDEX, + S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1)); /* Start the thread trace with a different event based on the queue. */ if (qf == RADV_QUEUE_COMPUTE) { @@ -283,8 +266,7 @@ radv_copy_sqtt_info_regs(const struct radv_device *device, struct radeon_cmdbuf /* Copy back the info struct one DWORD at a time. */ for (unsigned i = 0; i < 3; i++) { radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) | - COPY_DATA_WR_CONFIRM); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) | COPY_DATA_WR_CONFIRM); radeon_emit(cs, sqtt_info_regs[i] >> 2); radeon_emit(cs, 0); /* unused */ radeon_emit(cs, (info_va + i * 4)); @@ -342,30 +324,25 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, continue; /* Target SEi and SH0. */ - radeon_set_uconfig_reg( - cs, R_030800_GRBM_GFX_INDEX, - S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1)); + radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, + S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1)); if (device->physical_device->rad_info.gfx_level >= GFX11) { /* Make sure to wait for the trace buffer. */ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit( - cs, - WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */ + radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */ radeon_emit(cs, R_0367D0_SQ_THREAD_TRACE_STATUS >> 2); /* register */ radeon_emit(cs, 0); - radeon_emit(cs, 0); /* reference value */ + radeon_emit(cs, 0); /* reference value */ radeon_emit(cs, ~C_0367D0_FINISH_DONE); - radeon_emit(cs, 4); /* poll interval */ + radeon_emit(cs, 4); /* poll interval */ /* Disable the thread trace mode. */ - radeon_set_uconfig_reg(cs, R_0367B0_SQ_THREAD_TRACE_CTRL, - gfx11_get_sqtt_ctrl(device, false)); + radeon_set_uconfig_reg(cs, R_0367B0_SQ_THREAD_TRACE_CTRL, gfx11_get_sqtt_ctrl(device, false)); /* Wait for thread trace completion. */ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit( - cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ + radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ radeon_emit(cs, R_0367D0_SQ_THREAD_TRACE_STATUS >> 2); /* register */ radeon_emit(cs, 0); radeon_emit(cs, 0); /* reference value */ @@ -375,51 +352,46 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, if (!device->physical_device->rad_info.has_sqtt_rb_harvest_bug) { /* Make sure to wait for the trace buffer. */ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit( - cs, - WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */ + radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */ radeon_emit(cs, R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */ radeon_emit(cs, 0); - radeon_emit(cs, 0); /* reference value */ + radeon_emit(cs, 0); /* reference value */ radeon_emit(cs, ~C_008D20_FINISH_DONE); - radeon_emit(cs, 4); /* poll interval */ + radeon_emit(cs, 4); /* poll interval */ } /* Disable the thread trace mode. */ - radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL, - gfx10_get_sqtt_ctrl(device, false)); + radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL, gfx10_get_sqtt_ctrl(device, false)); /* Wait for thread trace completion. */ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit( - cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ + radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ radeon_emit(cs, R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */ radeon_emit(cs, 0); - radeon_emit(cs, 0); /* reference value */ + radeon_emit(cs, 0); /* reference value */ radeon_emit(cs, ~C_008D20_BUSY); /* mask */ - radeon_emit(cs, 4); /* poll interval */ + radeon_emit(cs, 4); /* poll interval */ } else { /* Disable the thread trace mode. */ radeon_set_uconfig_reg(cs, R_030CD8_SQ_THREAD_TRACE_MODE, S_030CD8_MODE(0)); /* Wait for thread trace completion. */ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit( - cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ + radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ radeon_emit(cs, R_030CE8_SQ_THREAD_TRACE_STATUS >> 2); /* register */ radeon_emit(cs, 0); - radeon_emit(cs, 0); /* reference value */ + radeon_emit(cs, 0); /* reference value */ radeon_emit(cs, ~C_030CE8_BUSY); /* mask */ - radeon_emit(cs, 4); /* poll interval */ + radeon_emit(cs, 4); /* poll interval */ } radv_copy_sqtt_info_regs(device, cs, se); } /* Restore global broadcasting. */ - radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, - S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | - S_030800_INSTANCE_BROADCAST_WRITES(1)); + radeon_set_uconfig_reg( + cs, R_030800_GRBM_GFX_INDEX, + S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1)); } void @@ -455,9 +427,8 @@ void radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable) { if (device->physical_device->rad_info.gfx_level >= GFX9) { - uint32_t spi_config_cntl = - S_031100_GPR_WRITE_PRIORITY(0x2c688) | S_031100_EXP_PRIORITY_ORDER(3) | - S_031100_ENABLE_SQG_TOP_EVENTS(enable) | S_031100_ENABLE_SQG_BOP_EVENTS(enable); + uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) | S_031100_EXP_PRIORITY_ORDER(3) | + S_031100_ENABLE_SQG_TOP_EVENTS(enable) | S_031100_ENABLE_SQG_BOP_EVENTS(enable); if (device->physical_device->rad_info.gfx_level >= GFX10) spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3); @@ -465,9 +436,8 @@ radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf radeon_set_uconfig_reg(cs, R_031100_SPI_CONFIG_CNTL, spi_config_cntl); } else { /* SPI_CONFIG_CNTL is a protected register on GFX6-GFX8. */ - radeon_set_privileged_config_reg( - cs, R_009100_SPI_CONFIG_CNTL, - S_009100_ENABLE_SQG_TOP_EVENTS(enable) | S_009100_ENABLE_SQG_BOP_EVENTS(enable)); + radeon_set_privileged_config_reg(cs, R_009100_SPI_CONFIG_CNTL, + S_009100_ENABLE_SQG_TOP_EVENTS(enable) | S_009100_ENABLE_SQG_BOP_EVENTS(enable)); } } @@ -478,11 +448,9 @@ radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cm return; /* not needed */ if (device->physical_device->rad_info.gfx_level >= GFX10) { - radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL, - S_037390_PERFMON_CLOCK_STATE(inhibit)); + radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL, S_037390_PERFMON_CLOCK_STATE(inhibit)); } else if (device->physical_device->rad_info.gfx_level >= GFX8) { - radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL, - S_0372FC_PERFMON_CLOCK_STATE(inhibit)); + radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL, S_0372FC_PERFMON_CLOCK_STATE(inhibit)); } } @@ -504,10 +472,9 @@ radv_sqtt_init_bo(struct radv_device *device) size += device->sqtt.buffer_size * (uint64_t)max_se; struct radeon_winsys_bo *bo = NULL; - result = ws->buffer_create( - ws, size, 4096, RADEON_DOMAIN_VRAM, - RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM, - RADV_BO_PRIORITY_SCRATCH, 0, &bo); + result = ws->buffer_create(ws, size, 4096, RADEON_DOMAIN_VRAM, + RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM, + RADV_BO_PRIORITY_SCRATCH, 0, &bo); device->sqtt.bo = bo; if (result != VK_SUCCESS) return false; @@ -572,8 +539,7 @@ radv_unregister_queue(struct radv_device *device, struct radv_queue *queue) /* Destroy queue info record. */ simple_mtx_lock(&queue_info->lock); if (queue_info->record_count > 0) { - list_for_each_entry_safe(struct rgp_queue_info_record, record, &queue_info->record, list) - { + list_for_each_entry_safe (struct rgp_queue_info_record, record, &queue_info->record, list) { if (record->queue_id == (uintptr_t)queue) { queue_info->record_count--; list_del(&record->list); @@ -607,8 +573,7 @@ radv_sqtt_init(struct radv_device *device) struct ac_sqtt *sqtt = &device->sqtt; /* Default buffer size set to 32MB per SE. */ - device->sqtt.buffer_size = - (uint32_t)debug_get_num_option("RADV_THREAD_TRACE_BUFFER_SIZE", 32 * 1024 * 1024); + device->sqtt.buffer_size = (uint32_t)debug_get_num_option("RADV_THREAD_TRACE_BUFFER_SIZE", 32 * 1024 * 1024); device->sqtt.start_frame = (int)debug_get_num_option("RADV_THREAD_TRACE", -1); const char *trigger_file = getenv("RADV_THREAD_TRACE_TRIGGER"); @@ -826,9 +791,7 @@ radv_reset_sqtt_trace(struct radv_device *device) /* Clear clock calibration records. */ simple_mtx_lock(&clock_calibration->lock); - list_for_each_entry_safe(struct rgp_clock_calibration_record, record, &clock_calibration->record, - list) - { + list_for_each_entry_safe (struct rgp_clock_calibration_record, record, &clock_calibration->record, list) { clock_calibration->record_count--; list_del(&record->list); free(record); @@ -837,26 +800,23 @@ radv_reset_sqtt_trace(struct radv_device *device) } static VkResult -radv_get_calibrated_timestamps(struct radv_device *device, uint64_t *cpu_timestamp, - uint64_t *gpu_timestamp) +radv_get_calibrated_timestamps(struct radv_device *device, uint64_t *cpu_timestamp, uint64_t *gpu_timestamp) { uint64_t timestamps[2]; uint64_t max_deviation; VkResult result; - const VkCalibratedTimestampInfoEXT timestamp_infos[2] = { - { - .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, - .timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT, - }, - { - .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, - .timeDomain = VK_TIME_DOMAIN_DEVICE_EXT, - } - }; - - result = radv_GetCalibratedTimestampsEXT(radv_device_to_handle(device), 2, timestamp_infos, - timestamps, &max_deviation); + const VkCalibratedTimestampInfoEXT timestamp_infos[2] = {{ + .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, + .timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT, + }, + { + .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, + .timeDomain = VK_TIME_DOMAIN_DEVICE_EXT, + }}; + + result = + radv_GetCalibratedTimestampsEXT(radv_device_to_handle(device), 2, timestamp_infos, timestamps, &max_deviation); if (result != VK_SUCCESS) return result; diff --git a/src/amd/vulkan/radv_video.c b/src/amd/vulkan/radv_video.c index 07016ca..8d45d4a 100644 --- a/src/amd/vulkan/radv_video.c +++ b/src/amd/vulkan/radv_video.c @@ -28,65 +28,52 @@ #include "radv_private.h" #include "vk_video/vulkan_video_codecs_common.h" -#include "ac_vcn_dec.h" #include "ac_uvd_dec.h" +#include "ac_vcn_dec.h" #include "radv_cs.h" #include "radv_debug.h" -#define NUM_H264_REFS 17 -#define NUM_H265_REFS 8 +#define NUM_H264_REFS 17 +#define NUM_H265_REFS 8 #define FB_BUFFER_OFFSET 0x1000 #define FB_BUFFER_SIZE 2048 -#define FB_BUFFER_SIZE_TONGA (2048 * 64) +#define FB_BUFFER_SIZE_TONGA (2048 * 64) #define IT_SCALING_TABLE_SIZE 992 #define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024) /* Not 100% sure this isn't too much but works */ #define VID_DEFAULT_ALIGNMENT 256 -const int vl_zscan_h265_up_right_diagonal_16[] = -{ +const int vl_zscan_h265_up_right_diagonal_16[] = { /* Up-right diagonal scan order for 4x4 blocks - see H.265 section 6.5.3. */ - 0, 4, 1, 8, 5, 2, 12, 9, - 6, 3, 13, 10, 7, 14, 11, 15, + 0, 4, 1, 8, 5, 2, 12, 9, 6, 3, 13, 10, 7, 14, 11, 15, }; -const int vl_zscan_h265_up_right_diagonal[] = -{ +const int vl_zscan_h265_up_right_diagonal[] = { /* Up-right diagonal scan order for 8x8 blocks - see H.265 section 6.5.3. */ - 0, 8, 1, 16, 9, 2, 24, 17, - 10, 3, 32, 25, 18, 11, 4, 40, - 33, 26, 19, 12, 5, 48, 41, 34, - 27, 20, 13, 6, 56, 49, 42, 35, - 28, 21, 14, 7, 57, 50, 43, 36, - 29, 22, 15, 58, 51, 44, 37, 30, - 23, 59, 52, 45, 38, 31, 60, 53, - 46, 39, 61, 54, 47, 62, 55, 63, + 0, 8, 1, 16, 9, 2, 24, 17, 10, 3, 32, 25, 18, 11, 4, 40, 33, 26, 19, 12, 5, 48, + 41, 34, 27, 20, 13, 6, 56, 49, 42, 35, 28, 21, 14, 7, 57, 50, 43, 36, 29, 22, 15, 58, + 51, 44, 37, 30, 23, 59, 52, 45, 38, 31, 60, 53, 46, 39, 61, 54, 47, 62, 55, 63, }; static bool radv_enable_tier2(struct radv_physical_device *pdevice) { - if (pdevice->rad_info.family >= CHIP_NAVI21 && - !(pdevice->instance->debug_flags & RADV_DEBUG_VIDEO_ARRAY_PATH)) + if (pdevice->rad_info.family >= CHIP_NAVI21 && !(pdevice->instance->debug_flags & RADV_DEBUG_VIDEO_ARRAY_PATH)) return true; return false; } static bool -radv_vid_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, - unsigned *out_offset, void **ptr) +radv_vid_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr) { - return radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, size, VID_DEFAULT_ALIGNMENT, - out_offset, ptr); + return radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, size, VID_DEFAULT_ALIGNMENT, out_offset, ptr); } /* vcn unified queue (sq) ib header */ static void -radv_vcn_sq_header(struct radeon_cmdbuf *cs, - struct rvcn_sq_var *sq, - bool enc) +radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, bool enc) { /* vcn ib signature */ radeon_emit(cs, RADEON_VCN_SIGNATURE_SIZE); @@ -99,14 +86,12 @@ radv_vcn_sq_header(struct radeon_cmdbuf *cs, /* vcn ib engine info */ radeon_emit(cs, RADEON_VCN_ENGINE_INFO_SIZE); radeon_emit(cs, RADEON_VCN_ENGINE_INFO); - radeon_emit(cs, enc ? RADEON_VCN_ENGINE_TYPE_ENCODE - : RADEON_VCN_ENGINE_TYPE_DECODE); + radeon_emit(cs, enc ? RADEON_VCN_ENGINE_TYPE_ENCODE : RADEON_VCN_ENGINE_TYPE_DECODE); radeon_emit(cs, 0); } static void -radv_vcn_sq_tail(struct radeon_cmdbuf *cs, - struct rvcn_sq_var *sq) +radv_vcn_sq_tail(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq) { uint32_t *end; uint32_t size_in_dw; @@ -127,8 +112,8 @@ radv_vcn_sq_tail(struct radeon_cmdbuf *cs, } /* generate an stream handle */ -static -unsigned si_vid_alloc_stream_handle(struct radv_physical_device *pdevice) +static unsigned +si_vid_alloc_stream_handle(struct radv_physical_device *pdevice) { unsigned stream_handle = pdevice->stream_handle_base; @@ -139,8 +124,7 @@ unsigned si_vid_alloc_stream_handle(struct radv_physical_device *pdevice) void radv_init_physical_device_decoder(struct radv_physical_device *pdevice) { - if (pdevice->rad_info.family >= CHIP_GFX1100 || - pdevice->rad_info.family == CHIP_GFX940) + if (pdevice->rad_info.family >= CHIP_GFX1100 || pdevice->rad_info.family == CHIP_GFX940) pdevice->vid_decode_ip = AMD_IP_VCN_UNIFIED; else if (radv_has_uvd(pdevice)) pdevice->vid_decode_ip = AMD_IP_UVD; @@ -214,12 +198,14 @@ radv_init_physical_device_decoder(struct radv_physical_device *pdevice) } } -static bool have_it(struct radv_video_session *vid) +static bool +have_it(struct radv_video_session *vid) { return vid->stream_type == RDECODE_CODEC_H264_PERF || vid->stream_type == RDECODE_CODEC_H265; } -static unsigned calc_ctx_size_h264_perf(struct radv_video_session *vid) +static unsigned +calc_ctx_size_h264_perf(struct radv_video_session *vid) { unsigned width_in_mb, height_in_mb, ctx_size; unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH); @@ -236,7 +222,8 @@ static unsigned calc_ctx_size_h264_perf(struct radv_video_session *vid) return ctx_size; } -static unsigned calc_ctx_size_h265_main(struct radv_video_session *vid) +static unsigned +calc_ctx_size_h265_main(struct radv_video_session *vid) { unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH); unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT); @@ -253,7 +240,8 @@ static unsigned calc_ctx_size_h265_main(struct radv_video_session *vid) return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024; } -static unsigned calc_ctx_size_h265_main10(struct radv_video_session *vid) +static unsigned +calc_ctx_size_h265_main10(struct radv_video_session *vid) { unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; @@ -287,10 +275,8 @@ static unsigned calc_ctx_size_h265_main10(struct radv_video_session *vid) } VkResult -radv_CreateVideoSessionKHR(VkDevice _device, - const VkVideoSessionCreateInfoKHR *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkVideoSessionKHR *pVideoSession) +radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkVideoSessionKHR *pVideoSession) { RADV_FROM_HANDLE(radv_device, device, _device); @@ -301,9 +287,7 @@ radv_CreateVideoSessionKHR(VkDevice _device, memset(vid, 0, sizeof(struct radv_video_session)); - VkResult result = vk_video_session_init(&device->vk, - &vid->vk, - pCreateInfo); + VkResult result = vk_video_session_init(&device->vk, &vid->vk, pCreateInfo); if (result != VK_SUCCESS) { vk_free2(&device->vk.alloc, pAllocator, vid); return result; @@ -329,19 +313,18 @@ radv_CreateVideoSessionKHR(VkDevice _device, vid->stream_handle = si_vid_alloc_stream_handle(device->physical_device); vid->dbg_frame_cnt = 0; - vid->db_alignment = (device->physical_device->rad_info.family >= CHIP_RENOIR && - vid->vk.max_coded.width > 32 && - (vid->stream_type == RDECODE_CODEC_H265 && - vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)) ? 64 : 32; + vid->db_alignment = + (device->physical_device->rad_info.family >= CHIP_RENOIR && vid->vk.max_coded.width > 32 && + (vid->stream_type == RDECODE_CODEC_H265 && vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)) + ? 64 + : 32; *pVideoSession = radv_video_session_to_handle(vid); return VK_SUCCESS; } void -radv_DestroyVideoSessionKHR(VkDevice _device, - VkVideoSessionKHR _session, - const VkAllocationCallbacks *pAllocator) +radv_DestroyVideoSessionKHR(VkDevice _device, VkVideoSessionKHR _session, const VkAllocationCallbacks *pAllocator) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_video_session, vid, _session); @@ -352,10 +335,8 @@ radv_DestroyVideoSessionKHR(VkDevice _device, vk_free2(&device->vk.alloc, pAllocator, vid); } - VkResult -radv_CreateVideoSessionParametersKHR(VkDevice _device, - const VkVideoSessionParametersCreateInfoKHR *pCreateInfo, +radv_CreateVideoSessionParametersKHR(VkDevice _device, const VkVideoSessionParametersCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkVideoSessionParametersKHR *pVideoSessionParameters) { @@ -367,11 +348,8 @@ radv_CreateVideoSessionParametersKHR(VkDevice _device, if (!params) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - VkResult result = vk_video_session_parameters_init(&device->vk, - ¶ms->vk, - &vid->vk, - templ ? &templ->vk : NULL, - pCreateInfo); + VkResult result = + vk_video_session_parameters_init(&device->vk, ¶ms->vk, &vid->vk, templ ? &templ->vk : NULL, pCreateInfo); if (result != VK_SUCCESS) { vk_free2(&device->vk.alloc, pAllocator, params); return result; @@ -382,8 +360,7 @@ radv_CreateVideoSessionParametersKHR(VkDevice _device, } void -radv_DestroyVideoSessionParametersKHR(VkDevice _device, - VkVideoSessionParametersKHR _params, +radv_DestroyVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR _params, const VkAllocationCallbacks *pAllocator) { RADV_FROM_HANDLE(radv_device, device, _device); @@ -394,8 +371,7 @@ radv_DestroyVideoSessionParametersKHR(VkDevice _device, } VkResult -radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, - const VkVideoProfileInfoKHR *pVideoProfile, +radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, const VkVideoProfileInfoKHR *pVideoProfile, VkVideoCapabilitiesKHR *pCapabilities) { RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); @@ -423,8 +399,8 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, pCapabilities->minCodedExtent.width = VL_MACROBLOCK_WIDTH; pCapabilities->minCodedExtent.height = VL_MACROBLOCK_HEIGHT; - struct VkVideoDecodeCapabilitiesKHR *dec_caps = (struct VkVideoDecodeCapabilitiesKHR *) - vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_CAPABILITIES_KHR); + struct VkVideoDecodeCapabilitiesKHR *dec_caps = + (struct VkVideoDecodeCapabilitiesKHR *)vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_CAPABILITIES_KHR); if (dec_caps) dec_caps->flags = VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR; @@ -437,12 +413,11 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, switch (pVideoProfile->videoCodecOperation) { case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: { - struct VkVideoDecodeH264CapabilitiesKHR *ext = (struct VkVideoDecodeH264CapabilitiesKHR *) - vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_H264_CAPABILITIES_KHR); + struct VkVideoDecodeH264CapabilitiesKHR *ext = (struct VkVideoDecodeH264CapabilitiesKHR *)vk_find_struct( + pCapabilities->pNext, VIDEO_DECODE_H264_CAPABILITIES_KHR); const struct VkVideoDecodeH264ProfileInfoKHR *h264_profile = - vk_find_struct_const(pVideoProfile->pNext, - VIDEO_DECODE_H264_PROFILE_INFO_KHR); + vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H264_PROFILE_INFO_KHR); if (h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_BASELINE && h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_MAIN && @@ -466,12 +441,11 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, break; } case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: { - struct VkVideoDecodeH265CapabilitiesKHR *ext = (struct VkVideoDecodeH265CapabilitiesKHR *) - vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_H265_CAPABILITIES_KHR); + struct VkVideoDecodeH265CapabilitiesKHR *ext = (struct VkVideoDecodeH265CapabilitiesKHR *)vk_find_struct( + pCapabilities->pNext, VIDEO_DECODE_H265_CAPABILITIES_KHR); const struct VkVideoDecodeH265ProfileInfoKHR *h265_profile = - vk_find_struct_const(pVideoProfile->pNext, - VIDEO_DECODE_H265_PROFILE_INFO_KHR); + vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H265_PROFILE_INFO_KHR); if (h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN && h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_10 && @@ -505,10 +479,10 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, pCapabilities->maxCodedExtent.height = (pdevice->rad_info.family < CHIP_TONGA) ? 1152 : 4096; break; case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: - pCapabilities->maxCodedExtent.width = (pdevice->rad_info.family < CHIP_RENOIR) ? - ((pdevice->rad_info.family < CHIP_TONGA) ? 2048 : 4096) : 8192; - pCapabilities->maxCodedExtent.height = (pdevice->rad_info.family < CHIP_RENOIR) ? - ((pdevice->rad_info.family < CHIP_TONGA) ? 1152 : 4096) : 4352; + pCapabilities->maxCodedExtent.width = + (pdevice->rad_info.family < CHIP_RENOIR) ? ((pdevice->rad_info.family < CHIP_TONGA) ? 2048 : 4096) : 8192; + pCapabilities->maxCodedExtent.height = + (pdevice->rad_info.family < CHIP_RENOIR) ? ((pdevice->rad_info.family < CHIP_TONGA) ? 1152 : 4096) : 4352; break; default: break; @@ -525,19 +499,17 @@ radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice, VkVideoFormatPropertiesKHR *pVideoFormatProperties) { /* radv requires separate allocates for DPB and decode video. */ - if ((pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | - VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) == + if ((pVideoFormatInfo->imageUsage & + (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) == (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) return VK_ERROR_FORMAT_NOT_SUPPORTED; - VK_OUTARRAY_MAKE_TYPED(VkVideoFormatPropertiesKHR, out, - pVideoFormatProperties, - pVideoFormatPropertyCount); + VK_OUTARRAY_MAKE_TYPED(VkVideoFormatPropertiesKHR, out, pVideoFormatProperties, pVideoFormatPropertyCount); bool need_8bit = true; bool need_10bit = false; - const struct VkVideoProfileListInfoKHR *prof_list = (struct VkVideoProfileListInfoKHR *) - vk_find_struct_const(pVideoFormatInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR); + const struct VkVideoProfileListInfoKHR *prof_list = + (struct VkVideoProfileListInfoKHR *)vk_find_struct_const(pVideoFormatInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR); if (prof_list) { for (unsigned i = 0; i < prof_list->profileCount; i++) { const VkVideoProfileInfoKHR *profile = &prof_list->pProfiles[i]; @@ -547,7 +519,8 @@ radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice, } if (need_10bit) { - vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p) { + vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p) + { p->format = VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16; p->imageType = VK_IMAGE_TYPE_2D; p->imageTiling = VK_IMAGE_TILING_OPTIMAL; @@ -559,12 +532,13 @@ radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice, } if (need_8bit) { - vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p) { - p->format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM; - p->imageType = VK_IMAGE_TYPE_2D; - p->imageTiling = VK_IMAGE_TILING_OPTIMAL; - p->imageUsageFlags = pVideoFormatInfo->imageUsage; - } + vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p) + { + p->format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM; + p->imageType = VK_IMAGE_TYPE_2D; + p->imageTiling = VK_IMAGE_TILING_OPTIMAL; + p->imageUsageFlags = pVideoFormatInfo->imageUsage; + } } return vk_outarray_status(&out); @@ -574,8 +548,7 @@ radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice, #define RADV_BIND_DECODER_CTX 1 VkResult -radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, - VkVideoSessionKHR videoSession, +radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR videoSession, uint32_t *pMemoryRequirementsCount, VkVideoSessionMemoryRequirementsKHR *pMemoryRequirements) { @@ -583,13 +556,12 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, RADV_FROM_HANDLE(radv_video_session, vid, videoSession); uint32_t memory_type_bits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1; - VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out, - pMemoryRequirements, - pMemoryRequirementsCount); + VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out, pMemoryRequirements, pMemoryRequirementsCount); /* 1 buffer for session context */ if (device->physical_device->rad_info.family >= CHIP_POLARIS10) { - vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) { + vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) + { m->memoryBindIndex = RADV_BIND_SESSION_CTX; m->memoryRequirements.size = RDECODE_SESSION_CONTEXT_SIZE; m->memoryRequirements.alignment = 0; @@ -597,9 +569,9 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, } } - if (vid->stream_type == RDECODE_CODEC_H264_PERF && - device->physical_device->rad_info.family >= CHIP_POLARIS10) { - vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) { + if (vid->stream_type == RDECODE_CODEC_H264_PERF && device->physical_device->rad_info.family >= CHIP_POLARIS10) { + vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) + { m->memoryBindIndex = RADV_BIND_DECODER_CTX; m->memoryRequirements.size = align(calc_ctx_size_h264_perf(vid), 4096); m->memoryRequirements.alignment = 0; @@ -613,7 +585,8 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, ctx_size = calc_ctx_size_h265_main10(vid); else ctx_size = calc_ctx_size_h265_main(vid); - vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) { + vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) + { m->memoryBindIndex = RADV_BIND_DECODER_CTX; m->memoryRequirements.size = align(ctx_size, 4096); m->memoryRequirements.alignment = 0; @@ -624,8 +597,7 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, } VkResult -radv_UpdateVideoSessionParametersKHR(VkDevice _device, - VkVideoSessionParametersKHR videoSessionParameters, +radv_UpdateVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR videoSessionParameters, const VkVideoSessionParametersUpdateInfoKHR *pUpdateInfo) { RADV_FROM_HANDLE(radv_video_session_params, params, videoSessionParameters); @@ -634,8 +606,7 @@ radv_UpdateVideoSessionParametersKHR(VkDevice _device, } static void -copy_bind(struct radv_vid_mem *dst, - const VkBindVideoSessionMemoryInfoKHR *src) +copy_bind(struct radv_vid_mem *dst, const VkBindVideoSessionMemoryInfoKHR *src) { dst->mem = radv_device_memory_from_handle(src->memory); dst->offset = src->memoryOffset; @@ -643,9 +614,7 @@ copy_bind(struct radv_vid_mem *dst, } VkResult -radv_BindVideoSessionMemoryKHR(VkDevice _device, - VkVideoSessionKHR videoSession, - uint32_t videoSessionBindMemoryCount, +radv_BindVideoSessionMemoryKHR(VkDevice _device, VkVideoSessionKHR videoSession, uint32_t videoSessionBindMemoryCount, const VkBindVideoSessionMemoryInfoKHR *pBindSessionMemoryInfos) { RADV_FROM_HANDLE(radv_video_session, vid, videoSession); @@ -667,15 +636,16 @@ radv_BindVideoSessionMemoryKHR(VkDevice _device, } /* add a new set register command to the IB */ -static void set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val) +static void +set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val) { struct radeon_cmdbuf *cs = cmd_buffer->cs; radeon_emit(cs, RDECODE_PKT0(reg >> 2, 0)); radeon_emit(cs, val); } -static void send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, - struct radeon_winsys_bo *bo, uint32_t offset) +static void +send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_bo *bo, uint32_t offset) { struct radv_physical_device *pdev = cmd_buffer->device->physical_device; uint64_t addr; @@ -691,7 +661,7 @@ static void send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1); return; } - switch(cmd) { + switch (cmd) { case RDECODE_CMD_MSG_BUFFER: cmd_buffer->video.decode_buffer->valid_buf_flag |= RDECODE_CMDBUF_FLAGS_MSG_BUFFER; cmd_buffer->video.decode_buffer->msg_buffer_address_hi = (addr >> 32); @@ -742,8 +712,8 @@ static void send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, } } -static void rvcn_dec_message_create(struct radv_video_session *vid, - void *ptr, uint32_t size) +static void +rvcn_dec_message_create(struct radv_video_session *vid, void *ptr, uint32_t size) { rvcn_dec_message_header_t *header = ptr; rvcn_dec_message_create_t *create = (void *)((char *)ptr + sizeof(rvcn_dec_message_header_t)); @@ -767,7 +737,8 @@ static void rvcn_dec_message_create(struct radv_video_session *vid, create->height_in_samples = vid->vk.max_coded.height; } -static void rvcn_dec_message_feedback(void *ptr) +static void +rvcn_dec_message_feedback(void *ptr) { rvcn_dec_feedback_header_t *header = (void *)ptr; @@ -776,22 +747,18 @@ static void rvcn_dec_message_feedback(void *ptr) header->num_buffers = 0; } -static const uint8_t h264_levels[] = { 10, 11, 12, 13, 20, 21, 22, - 30, 31, 32, 40, 41, 42, - 50, 51, 52, 60, 61, 62 }; -static uint8_t get_h264_level(StdVideoH264LevelIdc level) +static const uint8_t h264_levels[] = {10, 11, 12, 13, 20, 21, 22, 30, 31, 32, 40, 41, 42, 50, 51, 52, 60, 61, 62}; +static uint8_t +get_h264_level(StdVideoH264LevelIdc level) { - assert (level <= STD_VIDEO_H264_LEVEL_IDC_6_2); + assert(level <= STD_VIDEO_H264_LEVEL_IDC_6_2); return h264_levels[level]; } -static rvcn_dec_message_avc_t get_h264_msg(struct radv_video_session *vid, - struct radv_video_session_params *params, - const struct VkVideoDecodeInfoKHR *frame_info, - uint32_t *slice_offset, - uint32_t *width_in_samples, - uint32_t *height_in_samples, - void *it_ptr) +static rvcn_dec_message_avc_t +get_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params, + const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples, + uint32_t *height_in_samples, void *it_ptr) { rvcn_dec_message_avc_t result; const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info = @@ -802,7 +769,8 @@ static rvcn_dec_message_avc_t get_h264_msg(struct radv_video_session *vid, memset(&result, 0, sizeof(result)); assert(params->vk.h264_dec.std_sps_count > 0); - const StdVideoH264SequenceParameterSet *sps = vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id); + const StdVideoH264SequenceParameterSet *sps = + vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id); switch (sps->profile_idc) { case STD_VIDEO_H264_PROFILE_IDC_BASELINE: result.profile = RDECODE_H264_PROFILE_BASELINE; @@ -815,7 +783,7 @@ static rvcn_dec_message_avc_t get_h264_msg(struct radv_video_session *vid, break; default: fprintf(stderr, "UNSUPPORTED CODEC %d\n", sps->profile_idc); - result.profile= RDECODE_H264_PROFILE_MAIN; + result.profile = RDECODE_H264_PROFILE_MAIN; break; } @@ -842,7 +810,8 @@ static rvcn_dec_message_avc_t get_h264_msg(struct radv_video_session *vid, result.chroma_format = sps->chroma_format_idc; - const StdVideoH264PictureParameterSet *pps = vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id); + const StdVideoH264PictureParameterSet *pps = + vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id); result.pps_info_flags = 0; result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0; result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1; @@ -866,8 +835,8 @@ static rvcn_dec_message_avc_t get_h264_msg(struct radv_video_session *vid, memcpy(result.scaling_list_8x8[0], sps->pScalingLists->ScalingList8x8[0], 64); memcpy(result.scaling_list_8x8[1], sps->pScalingLists->ScalingList8x8[3], 64); } else { - memset(result.scaling_list_4x4, 0x10, 6*16); - memset(result.scaling_list_8x8, 0x10, 2*64); + memset(result.scaling_list_4x4, 0x10, 6 * 16); + memset(result.scaling_list_8x8, 0x10, 2 * 64); } memset(it_ptr, 0, IT_SCALING_TABLE_SIZE); @@ -904,15 +873,13 @@ static rvcn_dec_message_avc_t get_h264_msg(struct radv_video_session *vid, if (dpb_slot->pStdReferenceInfo->flags.bottom_field_flag) result.used_for_reference_flags |= (1 << (2 * i + 1)); - if (!dpb_slot->pStdReferenceInfo->flags.top_field_flag && - !dpb_slot->pStdReferenceInfo->flags.bottom_field_flag) + if (!dpb_slot->pStdReferenceInfo->flags.top_field_flag && !dpb_slot->pStdReferenceInfo->flags.bottom_field_flag) result.used_for_reference_flags |= (3 << (2 * i)); if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference) - result.ref_frame_list[i] |= 0x80; + result.ref_frame_list[i] |= 0x80; if (dpb_slot->pStdReferenceInfo->flags.is_non_existing) result.non_existing_frame_flags |= 1 << i; - } result.curr_pic_ref_frame_num = frame_info->referenceSlotCount; result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex; @@ -920,17 +887,21 @@ static rvcn_dec_message_avc_t get_h264_msg(struct radv_video_session *vid, return result; } -static void update_h265_scaling(void *it_ptr, - const StdVideoH265ScalingLists *scaling_lists) +static void +update_h265_scaling(void *it_ptr, const StdVideoH265ScalingLists *scaling_lists) { - uint8_t ScalingList4x4[STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS] = { 0 }; - uint8_t ScalingList8x8[STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS] = { 0 }; - uint8_t ScalingList16x16[STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS] = { 0 }; - uint8_t ScalingList32x32[STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS] = { 0 }; + uint8_t ScalingList4x4[STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS] = { + 0}; + uint8_t ScalingList8x8[STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS] = { + 0}; + uint8_t ScalingList16x16[STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS] + [STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS] = {0}; + uint8_t ScalingList32x32[STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS] + [STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS] = {0}; int i, j; if (scaling_lists) { - for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++) { + for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++) { for (j = 0; j < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS; j++) ScalingList4x4[i][j] = scaling_lists->ScalingList4x4[i][vl_zscan_h265_up_right_diagonal_16[j]]; for (j = 0; j < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS; j++) { @@ -942,17 +913,19 @@ static void update_h265_scaling(void *it_ptr, } } - memcpy(it_ptr, ScalingList4x4, STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS); - memcpy((char *)it_ptr + 96, ScalingList8x8, STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS); - memcpy((char *)it_ptr + 480, ScalingList16x16, STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS); - memcpy((char *)it_ptr + 864, ScalingList32x32, STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS); + memcpy(it_ptr, ScalingList4x4, + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS); + memcpy((char *)it_ptr + 96, ScalingList8x8, + STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS); + memcpy((char *)it_ptr + 480, ScalingList16x16, + STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS); + memcpy((char *)it_ptr + 864, ScalingList32x32, + STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS); } -static rvcn_dec_message_hevc_t get_h265_msg(struct radv_device *device, - struct radv_video_session *vid, - struct radv_video_session_params *params, - const struct VkVideoDecodeInfoKHR *frame_info, - void *it_ptr) +static rvcn_dec_message_hevc_t +get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params, + const struct VkVideoDecodeInfoKHR *frame_info, void *it_ptr) { rvcn_dec_message_hevc_t result; int i, j; @@ -960,8 +933,10 @@ static rvcn_dec_message_hevc_t get_h265_msg(struct radv_device *device, vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR); memset(&result, 0, sizeof(result)); - const StdVideoH265SequenceParameterSet *sps = vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->sps_video_parameter_set_id); - const StdVideoH265PictureParameterSet *pps = vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id); + const StdVideoH265SequenceParameterSet *sps = + vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->sps_video_parameter_set_id); + const StdVideoH265PictureParameterSet *pps = + vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id); result.sps_info_flags = 0; result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0; @@ -986,15 +961,12 @@ static rvcn_dec_message_hevc_t get_h265_msg(struct radv_device *device, result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8; result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8; result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4; - result.sps_max_dec_pic_buffering_minus1 = sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1]; - result.log2_min_luma_coding_block_size_minus3 = - sps->log2_min_luma_coding_block_size_minus3; - result.log2_diff_max_min_luma_coding_block_size = - sps->log2_diff_max_min_luma_coding_block_size; - result.log2_min_transform_block_size_minus2 = - sps->log2_min_luma_transform_block_size_minus2; - result.log2_diff_max_min_transform_block_size = - sps->log2_diff_max_min_luma_transform_block_size; + result.sps_max_dec_pic_buffering_minus1 = + sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1]; + result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3; + result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size; + result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2; + result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size; result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter; result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra; if (sps->flags.pcm_enabled_flag) { @@ -1074,7 +1046,6 @@ static rvcn_dec_message_hevc_t get_h265_msg(struct radv_device *device, for (i = 0; i < 8; ++i) result.ref_pic_set_lt_curr[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetLtCurr[i]); - const StdVideoH265ScalingLists *scaling_lists = NULL; if (pps->flags.pps_scaling_list_data_present_flag) scaling_lists = pps->pScalingLists; @@ -1093,7 +1064,7 @@ static rvcn_dec_message_hevc_t get_h265_msg(struct radv_device *device, for (i = 0; i < 2; i++) { for (j = 0; j < 15; j++) - result.direct_reflist[i][j] = 0xff;//pic->RefPicList[i][j]; + result.direct_reflist[i][j] = 0xff; // pic->RefPicList[i][j]; } if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) { @@ -1112,13 +1083,10 @@ static rvcn_dec_message_hevc_t get_h265_msg(struct radv_device *device, return result; } -static bool rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, - struct radv_video_session *vid, - struct radv_video_session_params *params, - void *ptr, - void *it_ptr, - uint32_t *slice_offset, - const struct VkVideoDecodeInfoKHR *frame_info) +static bool +rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_session *vid, + struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset, + const struct VkVideoDecodeInfoKHR *frame_info) { struct radv_device *device = cmd_buffer->device; rvcn_dec_message_header_t *header; @@ -1132,7 +1100,8 @@ static bool rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_image *img = dst_iv->image; struct radv_image_plane *luma = &img->planes[0]; struct radv_image_plane *chroma = &img->planes[1]; - struct radv_image_view *dpb_iv = radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding); + struct radv_image_view *dpb_iv = + radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding); struct radv_image *dpb = dpb_iv->image; header = ptr; @@ -1147,12 +1116,12 @@ static bool rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, } offset_decode = sizes; - decode = (void *)((char*)header + sizes); + decode = (void *)((char *)header + sizes); sizes += sizeof(rvcn_dec_message_decode_t); if (vid->dpb_type == DPB_DYNAMIC_TIER_2) { offset_dynamic_dpb = sizes; - dynamic_dpb_t2 = (void*)((char *)header + sizes); + dynamic_dpb_t2 = (void *)((char *)header + sizes); sizes += sizeof(rvcn_dec_message_dynamic_dpb_t2_t); } @@ -1195,8 +1164,7 @@ static bool rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, decode->dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0; - decode->dt_size = dst_iv->image->planes[0].surface.total_size + - dst_iv->image->planes[1].surface.total_size; + decode->dt_size = dst_iv->image->planes[0].surface.total_size + dst_iv->image->planes[1].surface.total_size; decode->sct_size = 0; decode->sc_coeff_size = 0; @@ -1226,10 +1194,8 @@ static bool rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset; if (decode->dt_field_mode) { - decode->dt_luma_bottom_offset = - luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size; - decode->dt_chroma_bottom_offset = - chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size; + decode->dt_luma_bottom_offset = luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size; + decode->dt_chroma_bottom_offset = chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size; } else { decode->dt_luma_bottom_offset = decode->dt_luma_top_offset; decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset; @@ -1238,7 +1204,8 @@ static bool rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, *slice_offset = 0; switch (vid->vk.op) { case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: { - rvcn_dec_message_avc_t avc = get_h264_msg(vid, params, frame_info, slice_offset, &decode->width_in_samples, &decode->height_in_samples, it_ptr); + rvcn_dec_message_avc_t avc = get_h264_msg(vid, params, frame_info, slice_offset, &decode->width_in_samples, + &decode->height_in_samples, it_ptr); memcpy(codec, (void *)&avc, sizeof(rvcn_dec_message_avc_t)); index_codec->message_id = RDECODE_MESSAGE_AVC; break; @@ -1260,7 +1227,8 @@ static bool rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, uint64_t addr; for (int i = 0; i < frame_info->referenceSlotCount; i++) { - struct radv_image_view *f_dpb_iv = radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding); + struct radv_image_view *f_dpb_iv = + radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding); struct radv_image *dpb_img = f_dpb_iv->image; radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo); @@ -1290,13 +1258,10 @@ static bool rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, return true; } -static struct ruvd_h264 get_uvd_h264_msg(struct radv_video_session *vid, - struct radv_video_session_params *params, - const struct VkVideoDecodeInfoKHR *frame_info, - uint32_t *slice_offset, - uint32_t *width_in_samples, - uint32_t *height_in_samples, - void *it_ptr) +static struct ruvd_h264 +get_uvd_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params, + const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples, + uint32_t *height_in_samples, void *it_ptr) { struct ruvd_h264 result; const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info = @@ -1306,7 +1271,8 @@ static struct ruvd_h264 get_uvd_h264_msg(struct radv_video_session *vid, memset(&result, 0, sizeof(result)); - const StdVideoH264SequenceParameterSet *sps = vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id); + const StdVideoH264SequenceParameterSet *sps = + vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id); switch (sps->profile_idc) { case STD_VIDEO_H264_PROFILE_IDC_BASELINE: result.profile = RUVD_H264_PROFILE_BASELINE; @@ -1345,7 +1311,8 @@ static struct ruvd_h264 get_uvd_h264_msg(struct radv_video_session *vid, result.chroma_format = sps->chroma_format_idc; - const StdVideoH264PictureParameterSet *pps = vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id); + const StdVideoH264PictureParameterSet *pps = + vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id); result.pps_info_flags = 0; result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0; result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1; @@ -1369,8 +1336,8 @@ static struct ruvd_h264 get_uvd_h264_msg(struct radv_video_session *vid, memcpy(result.scaling_list_8x8[0], sps->pScalingLists->ScalingList8x8[0], 64); memcpy(result.scaling_list_8x8[1], sps->pScalingLists->ScalingList8x8[3], 64); } else { - memset(result.scaling_list_4x4, 0x10, 6*16); - memset(result.scaling_list_8x8, 0x10, 2*64); + memset(result.scaling_list_4x4, 0x10, 6 * 16); + memset(result.scaling_list_8x8, 0x10, 2 * 64); } memset(it_ptr, 0, IT_SCALING_TABLE_SIZE); @@ -1400,7 +1367,7 @@ static struct ruvd_h264 get_uvd_h264_msg(struct radv_video_session *vid, result.ref_frame_list[i] = idx; if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference) - result.ref_frame_list[i] |= 0x80; + result.ref_frame_list[i] |= 0x80; } result.curr_pic_ref_frame_num = frame_info->referenceSlotCount; result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex; @@ -1408,11 +1375,9 @@ static struct ruvd_h264 get_uvd_h264_msg(struct radv_video_session *vid, return result; } -static struct ruvd_h265 get_uvd_h265_msg(struct radv_device *device, - struct radv_video_session *vid, - struct radv_video_session_params *params, - const struct VkVideoDecodeInfoKHR *frame_info, - void *it_ptr) +static struct ruvd_h265 +get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params, + const struct VkVideoDecodeInfoKHR *frame_info, void *it_ptr) { struct ruvd_h265 result; int i, j; @@ -1421,8 +1386,10 @@ static struct ruvd_h265 get_uvd_h265_msg(struct radv_device *device, memset(&result, 0, sizeof(result)); - const StdVideoH265SequenceParameterSet *sps = vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->sps_video_parameter_set_id); - const StdVideoH265PictureParameterSet *pps = vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id); + const StdVideoH265SequenceParameterSet *sps = + vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->sps_video_parameter_set_id); + const StdVideoH265PictureParameterSet *pps = + vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id); result.sps_info_flags = 0; result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0; @@ -1442,24 +1409,19 @@ static struct ruvd_h265 get_uvd_h265_msg(struct radv_device *device, result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8; result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8; result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4; - result.sps_max_dec_pic_buffering_minus1 = sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1]; - result.log2_min_luma_coding_block_size_minus3 = - sps->log2_min_luma_coding_block_size_minus3; - result.log2_diff_max_min_luma_coding_block_size = - sps->log2_diff_max_min_luma_coding_block_size; - result.log2_min_transform_block_size_minus2 = - sps->log2_min_luma_transform_block_size_minus2; - result.log2_diff_max_min_transform_block_size = - sps->log2_diff_max_min_luma_transform_block_size; + result.sps_max_dec_pic_buffering_minus1 = + sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1]; + result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3; + result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size; + result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2; + result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size; result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter; result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra; if (sps->flags.pcm_enabled_flag) { result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1; result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1; - result.log2_min_pcm_luma_coding_block_size_minus3 = - sps->log2_min_pcm_luma_coding_block_size_minus3; - result.log2_diff_max_min_pcm_luma_coding_block_size = - sps->log2_diff_max_min_pcm_luma_coding_block_size; + result.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3; + result.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size; } result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets; @@ -1549,7 +1511,7 @@ static struct ruvd_h265 get_uvd_h265_msg(struct radv_device *device, for (i = 0; i < 2; i++) { for (j = 0; j < 15; j++) - result.direct_reflist[i][j] = 0xff;//pic->RefPicList[i][j]; + result.direct_reflist[i][j] = 0xff; // pic->RefPicList[i][j]; } if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) { @@ -1568,26 +1530,25 @@ static struct ruvd_h265 get_uvd_h265_msg(struct radv_device *device, return result; } -static unsigned texture_offset_legacy(struct radeon_surf *surface, unsigned layer) +static unsigned +texture_offset_legacy(struct radeon_surf *surface, unsigned layer) { return (uint64_t)surface->u.legacy.level[0].offset_256B * 256 + - layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4; + layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4; } -static bool ruvd_dec_message_decode(struct radv_device *device, - struct radv_video_session *vid, - struct radv_video_session_params *params, - void *ptr, - void *it_ptr, - uint32_t *slice_offset, - const struct VkVideoDecodeInfoKHR *frame_info) +static bool +ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *vid, + struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset, + const struct VkVideoDecodeInfoKHR *frame_info) { struct ruvd_msg *msg = ptr; struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding); struct radv_image *img = dst_iv->image; struct radv_image_plane *luma = &img->planes[0]; struct radv_image_plane *chroma = &img->planes[1]; - struct radv_image_view *dpb_iv = radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding); + struct radv_image_view *dpb_iv = + radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding); struct radv_image *dpb = dpb_iv->image; memset(msg, 0, sizeof(struct ruvd_msg)); @@ -1605,18 +1566,15 @@ static bool ruvd_dec_message_decode(struct radv_device *device, msg->body.decode.bsd_size = frame_info->srcBufferRange; msg->body.decode.db_pitch = align(frame_info->dstPictureResource.codedExtent.width, vid->db_alignment); - if (vid->stream_type == RUVD_CODEC_H264_PERF && - device->physical_device->rad_info.family >= CHIP_POLARIS10) + if (vid->stream_type == RUVD_CODEC_H264_PERF && device->physical_device->rad_info.family >= CHIP_POLARIS10) msg->body.decode.dpb_reserved = vid->ctx.size; *slice_offset = 0; switch (vid->vk.op) { case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: { - msg->body.decode.codec.h264 = get_uvd_h264_msg(vid, params, frame_info, - slice_offset, - &msg->body.decode.width_in_samples, - &msg->body.decode.height_in_samples, - it_ptr); + msg->body.decode.codec.h264 = + get_uvd_h264_msg(vid, params, frame_info, slice_offset, &msg->body.decode.width_in_samples, + &msg->body.decode.height_in_samples, it_ptr); break; } case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: { @@ -1701,8 +1659,8 @@ static bool ruvd_dec_message_decode(struct radv_device *device, return true; } -static void ruvd_dec_message_create(struct radv_video_session *vid, - void *ptr) +static void +ruvd_dec_message_create(struct radv_video_session *vid, void *ptr) { struct ruvd_msg *msg = ptr; @@ -1716,8 +1674,7 @@ static void ruvd_dec_message_create(struct radv_video_session *vid, } void -radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, - const VkVideoBeginCodingInfoKHR *pBeginInfo) +radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoBeginCodingInfoKHR *pBeginInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_video_session, vid, pBeginInfo->videoSession); @@ -1729,19 +1686,15 @@ radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED) { radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 256); radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, false); - rvcn_decode_ib_package_t *ib_header = - (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); - ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + - sizeof(struct rvcn_decode_ib_package_s); + rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); + ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s); cmd_buffer->cs->cdw++; ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER); cmd_buffer->cs->cdw++; - cmd_buffer->video.decode_buffer = - (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); + cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4; memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s)); } - } static void @@ -1752,8 +1705,7 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer) void *ptr; uint32_t out_offset; - radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, - &ptr); + radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr); rvcn_dec_message_create(vid, ptr, size); send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset); @@ -1774,8 +1726,7 @@ radv_uvd_cmd_reset(struct radv_cmd_buffer *cmd_buffer) uint32_t size = sizeof(struct ruvd_msg); void *ptr; uint32_t out_offset; - radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, - &ptr); + radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr); ruvd_dec_message_create(vid, ptr); if (vid->sessionctx.mem) @@ -1788,8 +1739,7 @@ radv_uvd_cmd_reset(struct radv_cmd_buffer *cmd_buffer) } void -radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, - const VkVideoCodingControlInfoKHR *pCodingControlInfo) +radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoCodingControlInfoKHR *pCodingControlInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) { @@ -1801,8 +1751,7 @@ radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, } void -radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer, - const VkVideoEndCodingInfoKHR *pEndCodingInfo) +radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoEndCodingInfoKHR *pEndCodingInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); @@ -1813,8 +1762,7 @@ radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer, } static void -radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, - const VkVideoDecodeInfoKHR *frame_info) +radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info) { RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer); struct radv_video_session *vid = cmd_buffer->video.vid; @@ -1823,19 +1771,17 @@ radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, void *ptr, *fb_ptr, *it_ptr = NULL; uint32_t out_offset, fb_offset, it_offset = 0; struct radeon_winsys_bo *msg_bo, *fb_bo, *it_bo = NULL; - unsigned fb_size = (cmd_buffer->device->physical_device->rad_info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE; + unsigned fb_size = + (cmd_buffer->device->physical_device->rad_info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE; - radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, - &fb_ptr); + radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr); fb_bo = cmd_buffer->upload.upload_bo; if (have_it(vid)) { - radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_offset, - &it_ptr); + radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_offset, &it_ptr); it_bo = cmd_buffer->upload.upload_bo; } - radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, - &ptr); + radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr); msg_bo = cmd_buffer->upload.upload_bo; uint32_t slice_offset; @@ -1846,7 +1792,8 @@ radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset); if (vid->dpb_type != DPB_DYNAMIC_TIER_2) { - struct radv_image_view *dpb_iv = radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding); + struct radv_image_view *dpb_iv = + radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding); struct radv_image *dpb = dpb_iv->image; send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset); } @@ -1854,7 +1801,8 @@ radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, if (vid->ctx.mem) send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset); - send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo, src_buffer->offset + frame_info->srcBufferOffset + slice_offset); + send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo, + src_buffer->offset + frame_info->srcBufferOffset + slice_offset); struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding); struct radv_image *img = dst_iv->image; @@ -1868,8 +1816,7 @@ radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, } static void -radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, - const VkVideoDecodeInfoKHR *frame_info) +radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info) { RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer); struct radv_video_session *vid = cmd_buffer->video.vid; @@ -1885,7 +1832,7 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, size += sizeof(rvcn_dec_message_index_t); size += sizeof(rvcn_dec_message_dynamic_dpb_t2_t); } - size += sizeof(rvcn_dec_message_decode_t); /* decode */ + size += sizeof(rvcn_dec_message_decode_t); /* decode */ switch (vid->vk.op) { case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: size += sizeof(rvcn_dec_message_avc_t); @@ -1897,17 +1844,14 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, unreachable("unsupported codec."); } - radv_vid_buffer_upload_alloc(cmd_buffer, FB_BUFFER_SIZE, &fb_offset, - &fb_ptr); + radv_vid_buffer_upload_alloc(cmd_buffer, FB_BUFFER_SIZE, &fb_offset, &fb_ptr); fb_bo = cmd_buffer->upload.upload_bo; if (have_it(vid)) { - radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_offset, - &it_ptr); + radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_offset, &it_ptr); it_bo = cmd_buffer->upload.upload_bo; } - radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, - &ptr); + radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr); msg_bo = cmd_buffer->upload.upload_bo; uint32_t slice_offset; @@ -1917,7 +1861,8 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset); if (vid->dpb_type != DPB_DYNAMIC_TIER_2) { - struct radv_image_view *dpb_iv = radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding); + struct radv_image_view *dpb_iv = + radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding); struct radv_image *dpb = dpb_iv->image; send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset); } @@ -1925,7 +1870,8 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, if (vid->ctx.mem) send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset); - send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo, src_buffer->offset + frame_info->srcBufferOffset + slice_offset); + send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo, + src_buffer->offset + frame_info->srcBufferOffset + slice_offset); struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding); struct radv_image *img = dst_iv->image; @@ -1941,8 +1887,7 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, } void -radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer, - const VkVideoDecodeInfoKHR *frame_info) +radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer, const VkVideoDecodeInfoKHR *frame_info) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c index 597a158..f8c339b 100644 --- a/src/amd/vulkan/radv_wsi.c +++ b/src/amd/vulkan/radv_wsi.c @@ -23,9 +23,9 @@ * IN THE SOFTWARE. */ +#include "meta/radv_meta.h" #include "util/macros.h" #include "radv_debug.h" -#include "meta/radv_meta.h" #include "radv_private.h" #include "vk_fence.h" #include "vk_semaphore.h" @@ -68,8 +68,8 @@ radv_wsi_get_prime_blit_queue(VkDevice _device) .queueCount = 1, }; - device->private_sdma_queue = vk_zalloc(&device->vk.alloc, sizeof(struct radv_queue), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + device->private_sdma_queue = + vk_zalloc(&device->vk.alloc, sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); VkResult result = radv_queue_init(device, device->private_sdma_queue, 0, &queue_create, NULL); if (result == VK_SUCCESS) { @@ -90,9 +90,9 @@ VkResult radv_init_wsi(struct radv_physical_device *physical_device) { VkResult result = - wsi_device_init(&physical_device->wsi_device, radv_physical_device_to_handle(physical_device), - radv_wsi_proc_addr, &physical_device->instance->vk.alloc, - physical_device->master_fd, &physical_device->instance->dri_options, &(struct wsi_device_options){.sw_device = false}); + wsi_device_init(&physical_device->wsi_device, radv_physical_device_to_handle(physical_device), radv_wsi_proc_addr, + &physical_device->instance->vk.alloc, physical_device->master_fd, + &physical_device->instance->dri_options, &(struct wsi_device_options){.sw_device = false}); if (result != VK_SUCCESS) return result; diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 8db543d..285c0d5 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -33,39 +33,37 @@ #include "sid.h" static void -si_write_harvested_raster_configs(struct radv_physical_device *physical_device, - struct radeon_cmdbuf *cs, unsigned raster_config, - unsigned raster_config_1) +si_write_harvested_raster_configs(struct radv_physical_device *physical_device, struct radeon_cmdbuf *cs, + unsigned raster_config, unsigned raster_config_1) { unsigned num_se = MAX2(physical_device->rad_info.max_se, 1); unsigned raster_config_se[4]; unsigned se; - ac_get_harvested_configs(&physical_device->rad_info, raster_config, &raster_config_1, - raster_config_se); + ac_get_harvested_configs(&physical_device->rad_info, raster_config, &raster_config_1, raster_config_se); for (se = 0; se < num_se; se++) { /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */ if (physical_device->rad_info.gfx_level < GFX7) - radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX, - S_00802C_SE_INDEX(se) | S_00802C_SH_BROADCAST_WRITES(1) | - S_00802C_INSTANCE_BROADCAST_WRITES(1)); + radeon_set_config_reg( + cs, R_00802C_GRBM_GFX_INDEX, + S_00802C_SE_INDEX(se) | S_00802C_SH_BROADCAST_WRITES(1) | S_00802C_INSTANCE_BROADCAST_WRITES(1)); else - radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, - S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) | - S_030800_INSTANCE_BROADCAST_WRITES(1)); + radeon_set_uconfig_reg( + cs, R_030800_GRBM_GFX_INDEX, + S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1)); radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]); } /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */ if (physical_device->rad_info.gfx_level < GFX7) - radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX, - S_00802C_SE_BROADCAST_WRITES(1) | S_00802C_SH_BROADCAST_WRITES(1) | - S_00802C_INSTANCE_BROADCAST_WRITES(1)); + radeon_set_config_reg( + cs, R_00802C_GRBM_GFX_INDEX, + S_00802C_SE_BROADCAST_WRITES(1) | S_00802C_SH_BROADCAST_WRITES(1) | S_00802C_INSTANCE_BROADCAST_WRITES(1)); else - radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, - S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | - S_030800_INSTANCE_BROADCAST_WRITES(1)); + radeon_set_uconfig_reg( + cs, R_030800_GRBM_GFX_INDEX, + S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1)); if (physical_device->rad_info.gfx_level >= GFX7) radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); @@ -81,8 +79,7 @@ si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_emit(cs, 0); radeon_emit(cs, 0); - radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI, - S_00B834_DATA(device->physical_device->rad_info.address32_hi >> 8)); + radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(device->physical_device->rad_info.address32_hi >> 8)); radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2); /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1, @@ -105,8 +102,7 @@ si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs) } } - if (device->physical_device->rad_info.gfx_level >= GFX9 && - device->physical_device->rad_info.gfx_level < GFX11) { + if (device->physical_device->rad_info.gfx_level >= GFX9 && device->physical_device->rad_info.gfx_level < GFX11) { radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY, device->physical_device->rad_info.gfx_level >= GFX10 ? 0x20 : 0); } @@ -241,8 +237,7 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) if (!has_clear_state && physical_device->rad_info.gfx_level < GFX11) radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0); if (physical_device->rad_info.gfx_level < GFX7) - radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, - S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); + radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); if (!has_clear_state) radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); @@ -252,17 +247,13 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) */ if (physical_device->rad_info.gfx_level <= GFX7 || !has_clear_state) { radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); - radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, - S_028204_WINDOW_OFFSET_DISABLE(1)); - radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL, - S_028240_WINDOW_OFFSET_DISABLE(1)); - radeon_set_context_reg( - cs, R_028244_PA_SC_GENERIC_SCISSOR_BR, - S_028244_BR_X(MAX_FRAMEBUFFER_WIDTH) | S_028244_BR_Y(MAX_FRAMEBUFFER_HEIGHT)); + radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); + radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); + radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR, + S_028244_BR_X(MAX_FRAMEBUFFER_WIDTH) | S_028244_BR_Y(MAX_FRAMEBUFFER_HEIGHT)); radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); - radeon_set_context_reg( - cs, R_028034_PA_SC_SCREEN_SCISSOR_BR, - S_028034_BR_X(MAX_FRAMEBUFFER_WIDTH) | S_028034_BR_Y(MAX_FRAMEBUFFER_HEIGHT)); + radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR, + S_028034_BR_X(MAX_FRAMEBUFFER_WIDTH) | S_028034_BR_Y(MAX_FRAMEBUFFER_HEIGHT)); } if (!has_clear_state) { @@ -283,9 +274,9 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0); } - radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, - S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | - S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE)); + radeon_set_context_reg( + cs, R_02800C_DB_RENDER_OVERRIDE, + S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE)); if (physical_device->rad_info.gfx_level >= GFX10) { radeon_set_context_reg(cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0); @@ -296,9 +287,8 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_set_uconfig_reg(cs, R_030988_GE_USER_VGPR_EN, 0); if (physical_device->rad_info.gfx_level < GFX11) { - radeon_set_context_reg( - cs, R_028038_DB_DFSM_CONTROL, - S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF) | S_028038_POPS_DRAIN_PS_ON_OVERLAP(1)); + radeon_set_context_reg(cs, R_028038_DB_DFSM_CONTROL, + S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF) | S_028038_POPS_DRAIN_PS_ON_OVERLAP(1)); } } else if (physical_device->rad_info.gfx_level == GFX9) { radeon_set_uconfig_reg(cs, R_030920_VGT_MAX_VTX_INDX, ~0); @@ -306,8 +296,7 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_set_uconfig_reg(cs, R_030928_VGT_INDX_OFFSET, 0); radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL, - S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | - S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); + S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); } else { /* These registers, when written, also overwrite the * CLEAR_STATE context, so we can't rely on CLEAR_STATE setting @@ -354,38 +343,32 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) cu_mask_ps = u_bit_consecutive(0, physical_device->rad_info.min_good_cu_per_sa); if (physical_device->rad_info.gfx_level >= GFX7) { - if (physical_device->rad_info.gfx_level >= GFX10 && - physical_device->rad_info.gfx_level < GFX11) { + if (physical_device->rad_info.gfx_level >= GFX10 && physical_device->rad_info.gfx_level < GFX11) { /* Logical CUs 16 - 31 */ radeon_set_sh_reg_idx(physical_device, cs, R_00B104_SPI_SHADER_PGM_RSRC4_VS, 3, - ac_apply_cu_en(S_00B104_CU_EN(0xffff), - C_00B104_CU_EN, 16, &physical_device->rad_info)); + ac_apply_cu_en(S_00B104_CU_EN(0xffff), C_00B104_CU_EN, 16, &physical_device->rad_info)); } if (physical_device->rad_info.gfx_level >= GFX10) { radeon_set_sh_reg_idx(physical_device, cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS, 3, - ac_apply_cu_en(S_00B404_CU_EN(0xffff), - C_00B404_CU_EN, 16, &physical_device->rad_info)); - radeon_set_sh_reg_idx(physical_device, cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS, 3, - ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16), - C_00B004_CU_EN, 16, &physical_device->rad_info)); + ac_apply_cu_en(S_00B404_CU_EN(0xffff), C_00B404_CU_EN, 16, &physical_device->rad_info)); + radeon_set_sh_reg_idx( + physical_device, cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS, 3, + ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16), C_00B004_CU_EN, 16, &physical_device->rad_info)); } if (physical_device->rad_info.gfx_level >= GFX9) { radeon_set_sh_reg_idx(physical_device, cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 3, - ac_apply_cu_en(S_00B41C_CU_EN(0xffff) | - S_00B41C_WAVE_LIMIT(0x3F), - C_00B41C_CU_EN, 0, &physical_device->rad_info)); + ac_apply_cu_en(S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), C_00B41C_CU_EN, 0, + &physical_device->rad_info)); } else { radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, - ac_apply_cu_en(S_00B51C_CU_EN(0xffff) | - S_00B51C_WAVE_LIMIT(0x3F), - C_00B51C_CU_EN, 0, &physical_device->rad_info)); + ac_apply_cu_en(S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F), C_00B51C_CU_EN, 0, + &physical_device->rad_info)); radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F)); radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, - ac_apply_cu_en(S_00B31C_CU_EN(0xffff) | - S_00B31C_WAVE_LIMIT(0x3F), - C_00B31C_CU_EN, 0, &physical_device->rad_info)); + ac_apply_cu_en(S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F), C_00B31C_CU_EN, 0, + &physical_device->rad_info)); /* If this is 0, Bonaire can hang even if GS isn't being used. * Other chips are unaffected. These are suboptimal values, * but we don't use on-chip GS. @@ -395,9 +378,8 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) } radeon_set_sh_reg_idx(physical_device, cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 3, - ac_apply_cu_en(S_00B01C_CU_EN(cu_mask_ps) | - S_00B01C_WAVE_LIMIT(0x3F) | - S_00B01C_LDS_GROUP_SIZE(physical_device->rad_info.gfx_level >= GFX11), + ac_apply_cu_en(S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F) | + S_00B01C_LDS_GROUP_SIZE(physical_device->rad_info.gfx_level >= GFX11), C_00B01C_CU_EN, 0, &physical_device->rad_info)); } @@ -412,8 +394,7 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) * a single primitive shader subgroup. */ uint32_t max_deallocs_in_wave = physical_device->rad_info.gfx_level >= GFX11 ? 16 : 512; - radeon_set_context_reg(cs, R_028C50_PA_SC_NGG_MODE_CNTL, - S_028C50_MAX_DEALLOCS_IN_WAVE(max_deallocs_in_wave)); + radeon_set_context_reg(cs, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(max_deallocs_in_wave)); if (physical_device->rad_info.gfx_level < GFX11) radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); @@ -424,14 +405,12 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) */ unsigned vertex_reuse_depth = physical_device->rad_info.gfx_level >= GFX10_3 ? 30 : 0; radeon_set_context_reg(cs, R_028838_PA_CL_NGG_CNTL, - S_028838_INDEX_BUF_EDGE_FLAG_ENA(0) | - S_028838_VERTEX_REUSE_DEPTH(vertex_reuse_depth)); + S_028838_INDEX_BUF_EDGE_FLAG_ENA(0) | S_028838_VERTEX_REUSE_DEPTH(vertex_reuse_depth)); /* Enable CMASK/FMASK/HTILE/DCC caching in L2 for small chips. */ unsigned meta_write_policy, meta_read_policy; - unsigned no_alloc = device->physical_device->rad_info.gfx_level >= GFX11 - ? V_02807C_CACHE_NOA_GFX11 - : V_02807C_CACHE_NOA_GFX10; + unsigned no_alloc = + device->physical_device->rad_info.gfx_level >= GFX11 ? V_02807C_CACHE_NOA_GFX11 : V_02807C_CACHE_NOA_GFX10; /* TODO: investigate whether LRU improves performance on other chips too */ if (physical_device->rad_info.max_render_backends <= 4) { @@ -442,12 +421,11 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) meta_read_policy = no_alloc; /* don't cache reads */ } - radeon_set_context_reg( - cs, R_02807C_DB_RMI_L2_CACHE_CONTROL, - S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) | S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) | - S_02807C_HTILE_WR_POLICY(meta_write_policy) | - S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) | S_02807C_Z_RD_POLICY(no_alloc) | - S_02807C_S_RD_POLICY(no_alloc) | S_02807C_HTILE_RD_POLICY(meta_read_policy)); + radeon_set_context_reg(cs, R_02807C_DB_RMI_L2_CACHE_CONTROL, + S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) | S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) | + S_02807C_HTILE_WR_POLICY(meta_write_policy) | + S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) | S_02807C_Z_RD_POLICY(no_alloc) | + S_02807C_S_RD_POLICY(no_alloc) | S_02807C_HTILE_RD_POLICY(meta_read_policy)); uint32_t gl2_cc; if (device->physical_device->rad_info.gfx_level >= GFX11) { @@ -455,17 +433,14 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) S_028410_COLOR_WR_POLICY_GFX11(V_028410_CACHE_STREAM) | S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX11); } else { - gl2_cc = S_028410_CMASK_WR_POLICY(meta_write_policy) | - S_028410_FMASK_WR_POLICY(V_028410_CACHE_STREAM) | + gl2_cc = S_028410_CMASK_WR_POLICY(meta_write_policy) | S_028410_FMASK_WR_POLICY(V_028410_CACHE_STREAM) | S_028410_DCC_WR_POLICY_GFX10(meta_write_policy) | - S_028410_COLOR_WR_POLICY_GFX10(V_028410_CACHE_STREAM) | - S_028410_CMASK_RD_POLICY(meta_read_policy) | + S_028410_COLOR_WR_POLICY_GFX10(V_028410_CACHE_STREAM) | S_028410_CMASK_RD_POLICY(meta_read_policy) | S_028410_FMASK_RD_POLICY(V_028410_CACHE_NOA_GFX10) | S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX10); } - radeon_set_context_reg(cs, R_028410_CB_RMI_GL2_CACHE_CONTROL, - gl2_cc | S_028410_DCC_RD_POLICY(meta_read_policy)); + radeon_set_context_reg(cs, R_028410_CB_RMI_GL2_CACHE_CONTROL, gl2_cc | S_028410_DCC_RD_POLICY(meta_read_policy)); radeon_set_context_reg(cs, R_028428_CB_COVERAGE_OUT_CONTROL, 0); radeon_set_sh_reg_seq(cs, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 4); @@ -502,31 +477,27 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) if (physical_device->rad_info.gfx_level >= GFX10_3) { radeon_set_context_reg(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff); /* This allows sample shading. */ - radeon_set_context_reg( - cs, R_028848_PA_CL_VRS_CNTL, - S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE)); + radeon_set_context_reg(cs, R_028848_PA_CL_VRS_CNTL, + S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE)); } } if (physical_device->rad_info.gfx_level >= GFX11) { /* ACCUM fields changed their meaning. */ radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION, - S_028B50_ACCUM_ISOLINE(128) | S_028B50_ACCUM_TRI(128) | - S_028B50_ACCUM_QUAD(128) | S_028B50_DONUT_SPLIT_GFX9(24) | - S_028B50_TRAP_SPLIT(6)); + S_028B50_ACCUM_ISOLINE(128) | S_028B50_ACCUM_TRI(128) | S_028B50_ACCUM_QUAD(128) | + S_028B50_DONUT_SPLIT_GFX9(24) | S_028B50_TRAP_SPLIT(6)); } else if (physical_device->rad_info.gfx_level >= GFX9) { radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION, - S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | - S_028B50_ACCUM_QUAD(24) | S_028B50_DONUT_SPLIT_GFX9(24) | - S_028B50_TRAP_SPLIT(6)); + S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) | + S_028B50_DONUT_SPLIT_GFX9(24) | S_028B50_TRAP_SPLIT(6)); } else if (physical_device->rad_info.gfx_level >= GFX8) { uint32_t vgt_tess_distribution; - vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) | - S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT_GFX81(16); + vgt_tess_distribution = + S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) | S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT_GFX81(16); - if (physical_device->rad_info.family == CHIP_FIJI || - physical_device->rad_info.family >= CHIP_POLARIS10) + if (physical_device->rad_info.family == CHIP_FIJI || physical_device->rad_info.family >= CHIP_POLARIS10) vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); @@ -540,8 +511,7 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_set_context_reg(cs, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); if (physical_device->rad_info.gfx_level >= GFX7) { - radeon_set_context_reg(cs, R_028084_TA_BC_BASE_ADDR_HI, - S_028084_ADDRESS(border_color_va >> 40)); + radeon_set_context_reg(cs, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40)); } } @@ -553,21 +523,18 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) } if (physical_device->rad_info.gfx_level >= GFX9) { - radeon_set_context_reg( - cs, R_028C48_PA_SC_BINNER_CNTL_1, - S_028C48_MAX_ALLOC_COUNT(physical_device->rad_info.pbb_max_alloc_count - 1) | - S_028C48_MAX_PRIM_PER_BATCH(1023)); - radeon_set_context_reg(cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, - S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1)); + radeon_set_context_reg(cs, R_028C48_PA_SC_BINNER_CNTL_1, + S_028C48_MAX_ALLOC_COUNT(physical_device->rad_info.pbb_max_alloc_count - 1) | + S_028C48_MAX_PRIM_PER_BATCH(1023)); + radeon_set_context_reg(cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1)); radeon_set_uconfig_reg(cs, R_030968_VGT_INSTANCE_BASE_ID, 0); } unsigned tmp = (unsigned)(1.0 * 8.0); - radeon_set_context_reg(cs, R_028A00_PA_SU_POINT_SIZE, - S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); - radeon_set_context_reg(cs, R_028A04_PA_SU_POINT_MINMAX, - S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) | - S_028A04_MAX_SIZE(radv_pack_float_12p4(8191.875 / 2))); + radeon_set_context_reg(cs, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); + radeon_set_context_reg( + cs, R_028A04_PA_SU_POINT_MINMAX, + S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) | S_028A04_MAX_SIZE(radv_pack_float_12p4(8191.875 / 2))); if (!has_clear_state) { radeon_set_context_reg(cs, R_028004_DB_COUNT_CONTROL, S_028004_ZPASS_INCREMENT_DISABLE(1)); @@ -587,24 +554,22 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, small_prim_filter_cntl); } - radeon_set_context_reg( - cs, R_0286D4_SPI_INTERP_CONTROL_0, - S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(1) | - S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | - S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | - S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | - S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | - S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */ + radeon_set_context_reg(cs, R_0286D4_SPI_INTERP_CONTROL_0, + S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(1) | + S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | + S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | + S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | + S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | + S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */ radeon_set_context_reg(cs, R_028BE4_PA_SU_VTX_CNTL, S_028BE4_PIX_CENTER(1) | S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) | S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); radeon_set_context_reg(cs, R_028818_PA_CL_VTE_CNTL, - S_028818_VTX_W0_FMT(1) | S_028818_VPORT_X_SCALE_ENA(1) | - S_028818_VPORT_X_OFFSET_ENA(1) | S_028818_VPORT_Y_SCALE_ENA(1) | - S_028818_VPORT_Y_OFFSET_ENA(1) | S_028818_VPORT_Z_SCALE_ENA(1) | - S_028818_VPORT_Z_OFFSET_ENA(1)); + S_028818_VTX_W0_FMT(1) | S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) | + S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | + S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1)); if (device->tma_bo) { uint64_t tba_va, tma_va; @@ -614,9 +579,8 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) tba_va = radv_shader_get_va(device->trap_handler_shader); tma_va = radv_buffer_get_va(device->tma_bo); - uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS, R_00B100_SPI_SHADER_TBA_LO_VS, - R_00B200_SPI_SHADER_TBA_LO_GS, R_00B300_SPI_SHADER_TBA_LO_ES, - R_00B400_SPI_SHADER_TBA_LO_HS, R_00B500_SPI_SHADER_TBA_LO_LS}; + uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS, R_00B100_SPI_SHADER_TBA_LO_VS, R_00B200_SPI_SHADER_TBA_LO_GS, + R_00B300_SPI_SHADER_TBA_LO_ES, R_00B400_SPI_SHADER_TBA_LO_HS, R_00B500_SPI_SHADER_TBA_LO_LS}; for (i = 0; i < ARRAY_SIZE(regs); ++i) { radeon_set_sh_reg_seq(cs, regs[i], 4); @@ -629,16 +593,14 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) if (physical_device->rad_info.gfx_level >= GFX11) { radeon_set_context_reg(cs, R_028C54_PA_SC_BINNER_CNTL_2, 0); - radeon_set_context_reg(cs, R_028620_PA_RATE_CNTL, - S_028620_VERTEX_RATE(2) | S_028620_PRIM_RATE(1)); + radeon_set_context_reg(cs, R_028620_PA_RATE_CNTL, S_028620_VERTEX_RATE(2) | S_028620_PRIM_RATE(1)); uint64_t rb_mask = BITFIELD64_MASK(physical_device->rad_info.max_render_backends); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_CONTROL) | EVENT_INDEX(1)); - radeon_emit(cs, PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) | - PIXEL_PIPE_STATE_CNTL_STRIDE(2) | - PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(rb_mask)); + radeon_emit(cs, PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) | PIXEL_PIPE_STATE_CNTL_STRIDE(2) | + PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(rb_mask)); radeon_emit(cs, PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask)); radeon_set_uconfig_reg(cs, R_031110_SPI_GS_THROTTLE_CNTL1, 0x12355123); @@ -651,8 +613,7 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) */ bool exclusion = physical_device->rad_info.gfx_level >= GFX7; radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, - S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | - S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); + S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); si_emit_compute(device, cs); } @@ -675,11 +636,10 @@ cik_create_gfx_config(struct radv_device *device) radeon_emit(cs, PKT3_NOP_PAD); } - VkResult result = - device->ws->buffer_create(device->ws, cs->cdw * 4, 4096, device->ws->cs_domain(device->ws), - RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC, - RADV_BO_PRIORITY_CS, 0, &device->gfx_init); + VkResult result = device->ws->buffer_create( + device->ws, cs->cdw * 4, 4096, device->ws->cs_domain(device->ws), + RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC, + RADV_BO_PRIORITY_CS, 0, &device->gfx_init); if (result != VK_SUCCESS) goto fail; @@ -738,16 +698,13 @@ si_intersect_scissor(const VkRect2D *a, const VkRect2D *b) VkRect2D ret; ret.offset.x = MAX2(a->offset.x, b->offset.x); ret.offset.y = MAX2(a->offset.y, b->offset.y); - ret.extent.width = - MIN2(a->offset.x + a->extent.width, b->offset.x + b->extent.width) - ret.offset.x; - ret.extent.height = - MIN2(a->offset.y + a->extent.height, b->offset.y + b->extent.height) - ret.offset.y; + ret.extent.width = MIN2(a->offset.x + a->extent.width, b->offset.x + b->extent.width) - ret.offset.x; + ret.extent.height = MIN2(a->offset.y + a->extent.height, b->offset.y + b->extent.height) - ret.offset.y; return ret; } void -si_write_scissors(struct radeon_cmdbuf *cs, int count, const VkRect2D *scissors, - const VkViewport *viewports) +si_write_scissors(struct radeon_cmdbuf *cs, int count, const VkRect2D *scissors, const VkViewport *viewports) { int i; @@ -759,21 +716,19 @@ si_write_scissors(struct radeon_cmdbuf *cs, int count, const VkRect2D *scissors, VkRect2D viewport_scissor = si_scissor_from_viewport(viewports + i); VkRect2D scissor = si_intersect_scissor(&scissors[i], &viewport_scissor); - radeon_emit(cs, S_028250_TL_X(scissor.offset.x) | S_028250_TL_Y(scissor.offset.y) | - S_028250_WINDOW_OFFSET_DISABLE(1)); + radeon_emit( + cs, S_028250_TL_X(scissor.offset.x) | S_028250_TL_Y(scissor.offset.y) | S_028250_WINDOW_OFFSET_DISABLE(1)); radeon_emit(cs, S_028254_BR_X(scissor.offset.x + scissor.extent.width) | S_028254_BR_Y(scissor.offset.y + scissor.extent.height)); } } void -si_write_guardband(struct radeon_cmdbuf *cs, int count, const VkViewport *viewports, - unsigned rast_prim, unsigned polygon_mode, float line_width) +si_write_guardband(struct radeon_cmdbuf *cs, int count, const VkViewport *viewports, unsigned rast_prim, + unsigned polygon_mode, float line_width) { - const bool draw_points = - radv_rast_prim_is_point(rast_prim) || radv_polygon_mode_is_point(polygon_mode); - const bool draw_lines = - radv_rast_prim_is_line(rast_prim) || radv_polygon_mode_is_line(polygon_mode); + const bool draw_points = radv_rast_prim_is_point(rast_prim) || radv_polygon_mode_is_point(polygon_mode); + const bool draw_lines = radv_rast_prim_is_line(rast_prim) || radv_polygon_mode_is_line(polygon_mode); int i; float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY; float discard_x = 1.0f, discard_y = 1.0f; @@ -849,10 +804,9 @@ static const struct radv_prim_vertex_count prim_size_table[] = { }; uint32_t -si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, - bool indirect_draw, bool count_from_stream_output, - uint32_t draw_vertex_count, unsigned topology, bool prim_restart_enable, - unsigned patch_control_points, unsigned num_tess_patches) +si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, bool indirect_draw, + bool count_from_stream_output, uint32_t draw_vertex_count, unsigned topology, + bool prim_restart_enable, unsigned patch_control_points, unsigned num_tess_patches) { const struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; const unsigned max_primgroup_in_wave = 2; @@ -902,12 +856,10 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dra /* WD_SWITCH_ON_EOP has no effect on GPUs with less than * 4 shader engines. Set 1 to pass the assertion below. * The other cases are hardware requirements. */ - if (info->max_se < 4 || topology == V_008958_DI_PT_POLYGON || - topology == V_008958_DI_PT_LINELOOP || topology == V_008958_DI_PT_TRIFAN || - topology == V_008958_DI_PT_TRISTRIP_ADJ || - (prim_restart_enable && - (info->family < CHIP_POLARIS10 || - (topology != V_008958_DI_PT_POINTLIST && topology != V_008958_DI_PT_LINESTRIP)))) + if (info->max_se < 4 || topology == V_008958_DI_PT_POLYGON || topology == V_008958_DI_PT_LINELOOP || + topology == V_008958_DI_PT_TRIFAN || topology == V_008958_DI_PT_TRISTRIP_ADJ || + (prim_restart_enable && (info->family < CHIP_POLARIS10 || + (topology != V_008958_DI_PT_POINTLIST && topology != V_008958_DI_PT_LINESTRIP)))) wd_switch_on_eop = true; /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0. @@ -939,8 +891,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dra (info->family == CHIP_HAWAII || (info->gfx_level == GFX8 && /* max primgroup in wave is always 2 - leave this for documentation */ - (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY) || - max_primgroup_in_wave != 2)))) + (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY) || max_primgroup_in_wave != 2)))) partial_vs_wave = true; /* Instancing bug on Bonaire. */ @@ -974,28 +925,24 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dra /* Workaround for a VGT hang when strip primitive types are used with * primitive restart. */ - if (prim_restart_enable && - (topology == V_008958_DI_PT_LINESTRIP || topology == V_008958_DI_PT_TRISTRIP || - topology == V_008958_DI_PT_LINESTRIP_ADJ || topology == V_008958_DI_PT_TRISTRIP_ADJ)) { + if (prim_restart_enable && (topology == V_008958_DI_PT_LINESTRIP || topology == V_008958_DI_PT_TRISTRIP || + topology == V_008958_DI_PT_LINESTRIP_ADJ || topology == V_008958_DI_PT_TRISTRIP_ADJ)) { partial_vs_wave = true; } return cmd_buffer->state.ia_multi_vgt_param.base | S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) | S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) | - S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | - S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) | + S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) | S_028AA8_WD_SWITCH_ON_EOP(info->gfx_level >= GFX7 ? wd_switch_on_eop : 0); } void -si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec, - unsigned event, unsigned event_flags, unsigned dst_sel, - unsigned data_sel, uint64_t va, uint32_t new_fence, +si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec, unsigned event, + unsigned event_flags, unsigned dst_sel, unsigned data_sel, uint64_t va, uint32_t new_fence, uint64_t gfx9_eop_bug_va) { - unsigned op = EVENT_TYPE(event) | - EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) | - event_flags; + unsigned op = + EVENT_TYPE(event) | EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) | event_flags; unsigned is_gfx8_mec = is_mec && gfx_level < GFX9; unsigned sel = EOP_DST_SEL(dst_sel) | EOP_DATA_SEL(data_sel); @@ -1032,8 +979,7 @@ si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_leve * queue. */ if (event == V_028B9C_CS_DONE || event == V_028B9C_PS_DONE) { - assert(event_flags == 0 && dst_sel == EOP_DST_SEL_MEM && - data_sel == EOP_DATA_SEL_VALUE_32BIT); + assert(event_flags == 0 && dst_sel == EOP_DST_SEL_MEM && data_sel == EOP_DATA_SEL_VALUE_32BIT); if (is_mec) { radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, false)); @@ -1077,8 +1023,7 @@ si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_leve void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask) { - assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL || - op == WAIT_REG_MEM_GREATER_OR_EQUAL); + assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL || op == WAIT_REG_MEM_GREATER_OR_EQUAL); radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false)); radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1)); @@ -1112,10 +1057,9 @@ si_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, unsigne } static void -gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, - uint32_t *flush_cnt, uint64_t flush_va, bool is_mec, - enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits, - uint64_t gfx9_eop_bug_va) +gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, uint32_t *flush_cnt, + uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits, + enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va) { uint32_t gcr_cntl = 0; unsigned cb_db_event = 0; @@ -1231,17 +1175,15 @@ gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level unsigned gl2_wb = G_586_GL2_WB(gcr_cntl); unsigned gcr_seq = G_586_SEQ(gcr_cntl); - gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GLK_WB & C_586_GLK_INV & - C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */ + gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GLK_WB & C_586_GLK_INV & C_586_GLV_INV & C_586_GL1_INV & + C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */ /* Send an event that flushes caches. */ radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0)); - radeon_emit(cs, S_490_EVENT_TYPE(cb_db_event) | - S_490_EVENT_INDEX(5) | - S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) | - S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) | - S_490_SEQ(gcr_seq) | S_490_GLK_WB(glk_wb) | S_490_GLK_INV(glk_inv) | - S_490_PWS_ENABLE(1)); + radeon_emit(cs, S_490_EVENT_TYPE(cb_db_event) | S_490_EVENT_INDEX(5) | S_490_GLM_WB(glm_wb) | + S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) | S_490_GL1_INV(gl1_inv) | + S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) | S_490_SEQ(gcr_seq) | S_490_GLK_WB(glk_wb) | + S_490_GLK_INV(glk_inv) | S_490_PWS_ENABLE(1)); radeon_emit(cs, 0); /* DST_SEL, INT_SEL, DATA_SEL */ radeon_emit(cs, 0); /* ADDRESS_LO */ radeon_emit(cs, 0); /* ADDRESS_HI */ @@ -1251,14 +1193,12 @@ gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level /* Wait for the event and invalidate remaining caches if needed. */ radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0)); - radeon_emit(cs, S_580_PWS_STAGE_SEL(V_580_CP_PFP) | - S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) | - S_580_PWS_ENA2(1) | - S_580_PWS_COUNT(0)); + radeon_emit(cs, S_580_PWS_STAGE_SEL(V_580_CP_PFP) | S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) | + S_580_PWS_ENA2(1) | S_580_PWS_COUNT(0)); radeon_emit(cs, 0xffffffff); /* GCR_SIZE */ radeon_emit(cs, 0x01ffffff); /* GCR_SIZE_HI */ - radeon_emit(cs, 0); /* GCR_BASE_LO */ - radeon_emit(cs, 0); /* GCR_BASE_HI */ + radeon_emit(cs, 0); /* GCR_BASE_LO */ + radeon_emit(cs, 0); /* GCR_BASE_HI */ radeon_emit(cs, S_585_PWS_ENA(1)); radeon_emit(cs, gcr_cntl); /* GCR_CNTL */ @@ -1284,18 +1224,17 @@ gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level unsigned gl2_wb = G_586_GL2_WB(gcr_cntl); unsigned gcr_seq = G_586_SEQ(gcr_cntl); - gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV & - C_586_GL2_WB; /* keep SEQ */ + gcr_cntl &= + C_586_GLM_WB & C_586_GLM_INV & C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */ assert(flush_cnt); (*flush_cnt)++; - si_cs_emit_write_event_eop( - cs, gfx_level, false, cb_db_event, - S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) | - S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) | - S_490_SEQ(gcr_seq), - EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va); + si_cs_emit_write_event_eop(cs, gfx_level, false, cb_db_event, + S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) | + S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) | + S_490_SEQ(gcr_seq), + EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va); radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff); } @@ -1321,9 +1260,8 @@ gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level radeon_emit(cs, 0); /* CP_COHER_BASE_HI */ radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ radeon_emit(cs, gcr_cntl); /* GCR_CNTL */ - } else if ((cb_db_event || - (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | - RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) && + } else if ((cb_db_event || (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | + RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) && !is_mec) { /* We need to ensure that PFP waits as well. */ radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); @@ -1342,21 +1280,19 @@ gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level } void -si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, - enum amd_gfx_level gfx_level, uint32_t *flush_cnt, uint64_t flush_va, - bool is_mec, enum radv_cmd_flush_bits flush_bits, +si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, + uint32_t *flush_cnt, uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va) { unsigned cp_coher_cntl = 0; - uint32_t flush_cb_db = - flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB); + uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB); radeon_check_space(ws, cs, 128); if (gfx_level >= GFX10) { /* GFX10 cache flush handling is quite different. */ - gfx10_cs_emit_cache_flush(cs, gfx_level, flush_cnt, flush_va, is_mec, flush_bits, - sqtt_flush_bits, gfx9_eop_bug_va); + gfx10_cs_emit_cache_flush(cs, gfx_level, flush_cnt, flush_va, is_mec, flush_bits, sqtt_flush_bits, + gfx9_eop_bug_va); return; } @@ -1371,17 +1307,15 @@ si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, if (gfx_level <= GFX8) { if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) { - cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) | - S_0085F0_CB1_DEST_BASE_ENA(1) | S_0085F0_CB2_DEST_BASE_ENA(1) | - S_0085F0_CB3_DEST_BASE_ENA(1) | S_0085F0_CB4_DEST_BASE_ENA(1) | - S_0085F0_CB5_DEST_BASE_ENA(1) | S_0085F0_CB6_DEST_BASE_ENA(1) | - S_0085F0_CB7_DEST_BASE_ENA(1); + cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) | + S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) | + S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) | + S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1); /* Necessary for DCC */ if (gfx_level >= GFX8) { - si_cs_emit_write_event_eop(cs, gfx_level, is_mec, V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0, - EOP_DST_SEL_MEM, EOP_DATA_SEL_DISCARD, 0, 0, - gfx9_eop_bug_va); + si_cs_emit_write_event_eop(cs, gfx_level, is_mec, V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0, EOP_DST_SEL_MEM, + EOP_DATA_SEL_DISCARD, 0, 0, gfx9_eop_bug_va); } *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB; @@ -1446,8 +1380,7 @@ si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, */ tc_flags = EVENT_TC_ACTION_ENA | EVENT_TC_MD_ACTION_ENA; - *sqtt_flush_bits |= - RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB | RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB; + *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB | RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB; /* Ideally flush TC together with CB/DB. */ if (flush_bits & RADV_CMD_FLAG_INV_L2) { @@ -1463,8 +1396,8 @@ si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, assert(flush_cnt); (*flush_cnt)++; - si_cs_emit_write_event_eop(cs, gfx_level, false, cb_db_event, tc_flags, EOP_DST_SEL_MEM, - EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va); + si_cs_emit_write_event_eop(cs, gfx_level, false, cb_db_event, tc_flags, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, + flush_va, *flush_cnt, gfx9_eop_bug_va); radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff); } @@ -1492,8 +1425,7 @@ si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, *sqtt_flush_bits |= RGP_FLUSH_PFP_SYNC_ME; } - if ((flush_bits & RADV_CMD_FLAG_INV_L2) || - (gfx_level <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) { + if ((flush_bits & RADV_CMD_FLAG_INV_L2) || (gfx_level <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) { si_emit_acquire_mem(cs, is_mec, gfx_level == GFX9, cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) | S_0301F0_TC_WB_ACTION_ENA(gfx_level >= GFX8)); @@ -1508,16 +1440,14 @@ si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, * * WB doesn't work without NC. */ - si_emit_acquire_mem( - cs, is_mec, gfx_level == GFX9, - cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1)); + si_emit_acquire_mem(cs, is_mec, gfx_level == GFX9, + cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1)); cp_coher_cntl = 0; *sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2 | RGP_FLUSH_INVAL_VMEM_L0; } if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) { - si_emit_acquire_mem(cs, is_mec, gfx_level == GFX9, - cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1)); + si_emit_acquire_mem(cs, is_mec, gfx_level == GFX9, cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1)); cp_coher_cntl = 0; *sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0; @@ -1546,11 +1476,10 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) if (is_compute) cmd_buffer->state.flush_bits &= - ~(RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | - RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META | - RADV_CMD_FLAG_INV_L2_METADATA | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | - RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_VGT_FLUSH | - RADV_CMD_FLAG_START_PIPELINE_STATS | RADV_CMD_FLAG_STOP_PIPELINE_STATS); + ~(RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | RADV_CMD_FLAG_FLUSH_AND_INV_DB | + RADV_CMD_FLAG_FLUSH_AND_INV_DB_META | RADV_CMD_FLAG_INV_L2_METADATA | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | + RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_VGT_FLUSH | RADV_CMD_FLAG_START_PIPELINE_STATS | + RADV_CMD_FLAG_STOP_PIPELINE_STATS); if (!cmd_buffer->state.flush_bits) { radv_describe_barrier_end_delayed(cmd_buffer); @@ -1558,9 +1487,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) } si_cs_emit_cache_flush(cmd_buffer->device->ws, cmd_buffer->cs, - cmd_buffer->device->physical_device->rad_info.gfx_level, - &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va, - radv_cmd_buffer_uses_mec(cmd_buffer), cmd_buffer->state.flush_bits, + cmd_buffer->device->physical_device->rad_info.gfx_level, &cmd_buffer->gfx9_fence_idx, + cmd_buffer->gfx9_fence_va, radv_cmd_buffer_uses_mec(cmd_buffer), cmd_buffer->state.flush_bits, &cmd_buffer->state.sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va); if (unlikely(cmd_buffer->device->trace_bo)) @@ -1586,8 +1514,7 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) /* sets the CP predication state using a boolean stored at va */ void -si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, - unsigned pred_op, uint64_t va) +si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, unsigned pred_op, uint64_t va) { uint32_t op = 0; @@ -1634,8 +1561,9 @@ si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visi static inline unsigned cp_dma_max_byte_count(enum amd_gfx_level gfx_level) { - unsigned max = gfx_level >= GFX11 ? 32767 : - gfx_level >= GFX9 ? S_415_BYTE_COUNT_GFX9(~0u) : S_415_BYTE_COUNT_GFX6(~0u); + unsigned max = gfx_level >= GFX11 ? 32767 + : gfx_level >= GFX9 ? S_415_BYTE_COUNT_GFX9(~0u) + : S_415_BYTE_COUNT_GFX6(~0u); /* make it aligned for optimal performance */ return max & ~(SI_CPDMA_ALIGNMENT - 1); @@ -1646,8 +1574,8 @@ cp_dma_max_byte_count(enum amd_gfx_level gfx_level) * clear value. */ static void -si_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool predicating, - uint64_t dst_va, uint64_t src_va, unsigned size, unsigned flags) +si_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool predicating, uint64_t dst_va, + uint64_t src_va, unsigned size, unsigned flags) { uint32_t header = 0, command = 0; @@ -1673,8 +1601,7 @@ si_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool pre command |= S_415_RAW_WAIT(1); /* Src and dst flags. */ - if (device->physical_device->rad_info.gfx_level >= GFX9 && !(flags & CP_DMA_CLEAR) && - src_va == dst_va) + if (device->physical_device->rad_info.gfx_level >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va) header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */ else if (flags & CP_DMA_USE_L2) header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2); @@ -1705,8 +1632,7 @@ si_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool pre } static void -si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src_va, unsigned size, - unsigned flags) +si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src_va, unsigned size, unsigned flags) { struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radv_device *device = cmd_buffer->device; @@ -1734,8 +1660,8 @@ si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src } void -si_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, - unsigned size, bool predicating) +si_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size, + bool predicating) { struct radeon_winsys *ws = device->ws; enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; @@ -1749,16 +1675,13 @@ si_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs radeon_check_space(ws, cs, 9); uint64_t aligned_va = va & ~(SI_CPDMA_ALIGNMENT - 1); - uint64_t aligned_size = - ((va + size + SI_CPDMA_ALIGNMENT - 1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va; + uint64_t aligned_size = ((va + size + SI_CPDMA_ALIGNMENT - 1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va; if (gfx_level >= GFX9) { - command |= S_415_BYTE_COUNT_GFX9(aligned_size) | - S_415_DISABLE_WR_CONFIRM_GFX9(1); + command |= S_415_BYTE_COUNT_GFX9(aligned_size) | S_415_DISABLE_WR_CONFIRM_GFX9(1); header |= S_411_DST_SEL(V_411_NOWHERE); } else { - command |= S_415_BYTE_COUNT_GFX6(aligned_size) | - S_415_DISABLE_WR_CONFIRM_GFX6(1); + command |= S_415_BYTE_COUNT_GFX6(aligned_size) | S_415_DISABLE_WR_CONFIRM_GFX6(1); header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2); } @@ -1776,16 +1699,14 @@ si_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size) { - si_cs_cp_dma_prefetch(cmd_buffer->device, cmd_buffer->cs, va, size, - cmd_buffer->state.predicating); + si_cs_cp_dma_prefetch(cmd_buffer->device, cmd_buffer->cs, va, size, cmd_buffer->state.predicating); if (unlikely(cmd_buffer->device->trace_bo)) radv_cmd_buffer_trace_emit(cmd_buffer); } static void -si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count, uint64_t remaining_size, - unsigned *flags) +si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count, uint64_t remaining_size, unsigned *flags) { /* Flush the caches for the first copy only. @@ -1825,8 +1746,7 @@ si_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigned size) } void -si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, - uint64_t size) +si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, uint64_t size) { enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level; uint64_t main_src_va, main_dest_va; @@ -1900,8 +1820,7 @@ si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint6 } void -si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, - unsigned value) +si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, unsigned value) { if (!size) return; @@ -1958,10 +1877,10 @@ si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer) } /* For MSAA sample positions. */ -#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ - ((((unsigned)(s0x)&0xf) << 0) | (((unsigned)(s0y)&0xf) << 4) | (((unsigned)(s1x)&0xf) << 8) | \ - (((unsigned)(s1y)&0xf) << 12) | (((unsigned)(s2x)&0xf) << 16) | \ - (((unsigned)(s2y)&0xf) << 20) | (((unsigned)(s3x)&0xf) << 24) | (((unsigned)(s3y)&0xf) << 28)) +#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ + ((((unsigned)(s0x)&0xf) << 0) | (((unsigned)(s0y)&0xf) << 4) | (((unsigned)(s1x)&0xf) << 8) | \ + (((unsigned)(s1y)&0xf) << 12) | (((unsigned)(s2x)&0xf) << 16) | (((unsigned)(s2y)&0xf) << 20) | \ + (((unsigned)(s3x)&0xf) << 24) | (((unsigned)(s3y)&0xf) << 28)) /* For obtaining location coordinates from registers */ #define SEXT4(x) ((int)((x) | ((x)&0x8 ? 0xfffffff0 : 0))) @@ -2054,8 +1973,7 @@ radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples) } static void -radv_get_sample_position(struct radv_device *device, unsigned sample_count, unsigned sample_index, - float *out_value) +radv_get_sample_position(struct radv_device *device, unsigned sample_count, unsigned sample_index, float *out_value) { const uint32_t *sample_locs; diff --git a/src/amd/vulkan/vk_format.h b/src/amd/vulkan/vk_format.h index 47a80c8..e42a4d5 100644 --- a/src/amd/vulkan/vk_format.h +++ b/src/amd/vulkan/vk_format.h @@ -43,8 +43,7 @@ vk_format_get_first_non_void_channel(VkFormat format) } static inline enum pipe_swizzle -radv_swizzle_conv(VkComponentSwizzle component, const unsigned char chan[4], - VkComponentSwizzle vk_swiz) +radv_swizzle_conv(VkComponentSwizzle component, const unsigned char chan[4], VkComponentSwizzle vk_swiz) { if (vk_swiz == VK_COMPONENT_SWIZZLE_IDENTITY) vk_swiz = component; @@ -64,8 +63,7 @@ radv_swizzle_conv(VkComponentSwizzle component, const unsigned char chan[4], } static inline void -vk_format_compose_swizzles(const VkComponentMapping *mapping, const unsigned char swz[4], - enum pipe_swizzle dst[4]) +vk_format_compose_swizzles(const VkComponentMapping *mapping, const unsigned char swz[4], enum pipe_swizzle dst[4]) { dst[0] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_R, swz, mapping->r); dst[1] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_G, swz, mapping->g); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index 73c3e24..8c26c9f 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -45,9 +45,8 @@ static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo); static int -radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, amdgpu_bo_handle bo, uint64_t offset, - uint64_t size, uint64_t addr, uint32_t bo_flags, uint64_t internal_flags, - uint32_t ops) +radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, amdgpu_bo_handle bo, uint64_t offset, uint64_t size, uint64_t addr, + uint32_t bo_flags, uint64_t internal_flags, uint32_t ops) { uint64_t flags = internal_flags; if (bo) { @@ -78,8 +77,7 @@ radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo) { if (bo->bo_capacity < bo->range_count) { uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count); - struct radv_amdgpu_winsys_bo **bos = - realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *)); + struct radv_amdgpu_winsys_bo **bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *)); if (!bos) return VK_ERROR_OUT_OF_HOST_MEMORY; bo->bos = bos; @@ -108,9 +106,8 @@ radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo) } static VkResult -radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_winsys_bo *_parent, - uint64_t offset, uint64_t size, struct radeon_winsys_bo *_bo, - uint64_t bo_offset) +radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_winsys_bo *_parent, uint64_t offset, + uint64_t size, struct radeon_winsys_bo *_bo, uint64_t bo_offset) { struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent; @@ -128,11 +125,10 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_wins * will first unmap all existing VA that overlap the requested range and then map. */ if (bo) { - r = radv_amdgpu_bo_va_op(ws, bo->bo, bo_offset, size, parent->base.va + offset, 0, 0, - AMDGPU_VA_OP_REPLACE); + r = radv_amdgpu_bo_va_op(ws, bo->bo, bo_offset, size, parent->base.va + offset, 0, 0, AMDGPU_VA_OP_REPLACE); } else { - r = radv_amdgpu_bo_va_op(ws, NULL, 0, size, parent->base.va + offset, 0, AMDGPU_VM_PAGE_PRT, - AMDGPU_VA_OP_REPLACE); + r = + radv_amdgpu_bo_va_op(ws, NULL, 0, size, parent->base.va + offset, 0, AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_REPLACE); } if (r) { @@ -172,8 +168,7 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_wins * new parent, or are adjacent to it. This corresponds to the bind ranges * that may change. */ - while (first + 1 < parent->range_count && - parent->ranges[first].offset + parent->ranges[first].size < offset) + while (first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset) ++first; last = first; @@ -192,8 +187,7 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_wins /* Try to merge the new range with the first range. */ if (parent->ranges[first].bo == bo && - (!bo || - offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) { + (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) { size += offset - parent->ranges[first].offset; offset = parent->ranges[first].offset; bo_offset = parent->ranges[first].bo_offset; @@ -202,8 +196,7 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_wins /* Try to merge the new range with the last range. */ if (parent->ranges[last].bo == bo && - (!bo || - offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) { + (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) { size = parent->ranges[last].offset + parent->ranges[last].size - offset; remove_last = true; } @@ -294,8 +287,7 @@ radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys *ws, struct radv_amdgpu u_rwlock_wrlock(&ws->global_bo_list.lock); if (ws->global_bo_list.count == ws->global_bo_list.capacity) { unsigned capacity = MAX2(4, ws->global_bo_list.capacity * 2); - void *data = - realloc(ws->global_bo_list.bos, capacity * sizeof(struct radv_amdgpu_winsys_bo *)); + void *data = realloc(ws->global_bo_list.bos, capacity * sizeof(struct radv_amdgpu_winsys_bo *)); if (!data) { u_rwlock_wrunlock(&ws->global_bo_list.lock); return VK_ERROR_OUT_OF_HOST_MEMORY; @@ -369,9 +361,8 @@ radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo static VkResult radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment, - enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags, - unsigned priority, uint64_t replay_address, - struct radeon_winsys_bo **out_bo) + enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags, unsigned priority, + uint64_t replay_address, struct radeon_winsys_bo **out_bo) { struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); struct radv_amdgpu_winsys_bo *bo; @@ -398,14 +389,12 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned assert(!replay_address || (flags & RADEON_FLAG_REPLAYABLE)); - const uint64_t va_flags = AMDGPU_VA_RANGE_HIGH | - (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | + const uint64_t va_flags = AMDGPU_VA_RANGE_HIGH | (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | (flags & RADEON_FLAG_REPLAYABLE ? AMDGPU_VA_RANGE_REPLAYABLE : 0); - r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, replay_address, - &va, &va_handle, va_flags); + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, replay_address, &va, + &va_handle, va_flags); if (r) { - result = - replay_address ? VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS : VK_ERROR_OUT_OF_DEVICE_MEMORY; + result = replay_address ? VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS : VK_ERROR_OUT_OF_DEVICE_MEMORY; goto error_va_alloc; } @@ -431,8 +420,7 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned bo->ranges[0].bo_offset = 0; /* Reserve a PRT VA region. */ - r = radv_amdgpu_bo_va_op(ws, NULL, 0, size, bo->base.va, 0, AMDGPU_VM_PAGE_PRT, - AMDGPU_VA_OP_MAP); + r = radv_amdgpu_bo_va_op(ws, NULL, 0, size, bo->base.va, 0, AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP); if (r) { fprintf(stderr, "radv/amdgpu: Failed to reserve a PRT VA region (%d).\n", r); result = VK_ERROR_OUT_OF_DEVICE_MEMORY; @@ -583,8 +571,7 @@ radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo) } static uint64_t -radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, uint64_t size, - unsigned alignment) +radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, uint64_t size, unsigned alignment) { uint64_t vm_alignment = alignment; @@ -605,8 +592,8 @@ radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, uint64_t siz } static VkResult -radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_t size, - unsigned priority, struct radeon_winsys_bo **out_bo) +radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_t size, unsigned priority, + struct radeon_winsys_bo **out_bo) { struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); amdgpu_bo_handle buf_handle; @@ -634,8 +621,8 @@ radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_ */ vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size, ws->info.gart_page_size); - if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, vm_alignment, 0, &va, - &va_handle, AMDGPU_VA_RANGE_HIGH)) { + if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, vm_alignment, 0, &va, &va_handle, + AMDGPU_VA_RANGE_HIGH)) { result = VK_ERROR_OUT_OF_DEVICE_MEMORY; goto error_va_alloc; } @@ -678,8 +665,8 @@ error: } static VkResult -radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priority, - struct radeon_winsys_bo **out_bo, uint64_t *alloc_size) +radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priority, struct radeon_winsys_bo **out_bo, + uint64_t *alloc_size) { struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); struct radv_amdgpu_winsys_bo *bo; @@ -716,15 +703,14 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priori *alloc_size = info.alloc_size; } - r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, result.alloc_size, 1 << 20, 0, - &va, &va_handle, AMDGPU_VA_RANGE_HIGH); + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, result.alloc_size, 1 << 20, 0, &va, &va_handle, + AMDGPU_VA_RANGE_HIGH); if (r) { vk_result = VK_ERROR_OUT_OF_DEVICE_MEMORY; goto error_query; } - r = - radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size, va, 0, 0, AMDGPU_VA_OP_MAP); + r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size, va, 0, 0, AMDGPU_VA_OP_MAP); if (r) { vk_result = VK_ERROR_UNKNOWN; goto error_va_map; @@ -897,10 +883,8 @@ radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys *_ws, struct radeon_wins tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256b); tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max); tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64b_blocks); - tiling_flags |= - AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128b_blocks); - tiling_flags |= - AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size); + tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128b_blocks); + tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size); tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout); } else { if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED) @@ -914,8 +898,7 @@ radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys *_ws, struct radeon_wins tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw)); tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh)); if (md->u.legacy.tile_split) - tiling_flags |= - AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split)); + tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split)); tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea)); tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks) - 1); @@ -972,8 +955,7 @@ radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys *_ws, struct radeon_wins } static VkResult -radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, - bool resident) +radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, bool resident) { struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); @@ -1008,7 +990,8 @@ radv_amdgpu_bo_va_compare(const void *a, const void *b) return bo_a->base.va < bo_b->base.va ? -1 : bo_a->base.va > bo_b->base.va ? 1 : 0; } -static uint64_t radv_amdgpu_canonicalize_va(uint64_t va) +static uint64_t +radv_amdgpu_canonicalize_va(uint64_t va) { /* Would be less hardcoded to use addr32_hi (0xffff8000) to generate a mask, * but there are confusing differences between page fault reports from kernel where @@ -1027,10 +1010,9 @@ radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file) u_rwlock_rdlock(&ws->log_bo_list_lock); LIST_FOR_EACH_ENTRY (bo_log, &ws->log_bo_list, list) { - fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n", - (long long)bo_log->timestamp, (long long)radv_amdgpu_canonicalize_va(bo_log->va), - (long long)radv_amdgpu_canonicalize_va(bo_log->va + bo_log->size), - bo_log->destroyed, bo_log->is_virtual); + fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n", (long long)bo_log->timestamp, + (long long)radv_amdgpu_canonicalize_va(bo_log->va), + (long long)radv_amdgpu_canonicalize_va(bo_log->va + bo_log->size), bo_log->destroyed, bo_log->is_virtual); } u_rwlock_rdunlock(&ws->log_bo_list_lock); } @@ -1057,10 +1039,9 @@ radv_amdgpu_dump_bo_ranges(struct radeon_winsys *_ws, FILE *file) qsort(bos, ws->global_bo_list.count, sizeof(bos[0]), radv_amdgpu_bo_va_compare); for (i = 0; i < ws->global_bo_list.count; ++i) { - fprintf(file, " VA=%.16llx-%.16llx, handle=%d%s\n", - (long long)radv_amdgpu_canonicalize_va(bos[i]->base.va), - (long long)radv_amdgpu_canonicalize_va(bos[i]->base.va + bos[i]->size), - bos[i]->bo_handle, bos[i]->is_virtual ? " sparse" : ""); + fprintf(file, " VA=%.16llx-%.16llx, handle=%d%s\n", (long long)radv_amdgpu_canonicalize_va(bos[i]->base.va), + (long long)radv_amdgpu_canonicalize_va(bos[i]->base.va + bos[i]->size), bos[i]->bo_handle, + bos[i]->is_virtual ? " sparse" : ""); } free(bos); u_rwlock_rdunlock(&ws->global_bo_list.lock); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 644a504..0c131fd 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -110,8 +110,7 @@ radeon_emit_unchecked(struct radeon_cmdbuf *cs, uint32_t value) cs->buf[cs->cdw++] = value; } -static uint32_t radv_amdgpu_ctx_queue_syncobj(struct radv_amdgpu_ctx *ctx, unsigned ip, - unsigned ring); +static uint32_t radv_amdgpu_ctx_queue_syncobj(struct radv_amdgpu_ctx *ctx, unsigned ip, unsigned ring); static inline struct radv_amdgpu_cs * radv_amdgpu_cs(struct radeon_cmdbuf *base) @@ -120,8 +119,7 @@ radv_amdgpu_cs(struct radeon_cmdbuf *base) } static bool -ring_can_use_ib_bos(const struct radv_amdgpu_winsys *ws, - enum amd_ip_type ip_type) +ring_can_use_ib_bos(const struct radv_amdgpu_winsys *ws, enum amd_ip_type ip_type) { return ws->use_ib_bos && (ip_type == AMD_IP_GFX || ip_type == AMD_IP_COMPUTE); } @@ -159,8 +157,7 @@ struct radv_amdgpu_cs_request { uint64_t seq_no; }; -static VkResult radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, - struct radv_amdgpu_cs_request *request, +static VkResult radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request *request, struct radv_winsys_sem_info *sem_info); static void @@ -242,11 +239,11 @@ radv_amdgpu_cs_bo_create(struct radv_amdgpu_cs *cs, uint32_t ib_size) const bool avoid_vram = cs->is_secondary && !can_always_use_ib2; const enum radeon_bo_domain domain = avoid_vram ? RADEON_DOMAIN_GTT : radv_amdgpu_cs_domain(ws); const enum radeon_bo_flag gtt_wc_flag = avoid_vram ? 0 : RADEON_FLAG_GTT_WC; - const enum radeon_bo_flag flags = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_READ_ONLY | gtt_wc_flag; + const enum radeon_bo_flag flags = + RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | gtt_wc_flag; - return ws->buffer_create(ws, ib_size, cs->ws->info.ib_alignment, domain, flags, - RADV_BO_PRIORITY_CS, 0, &cs->ib_buffer); + return ws->buffer_create(ws, ib_size, cs->ws->info.ib_alignment, domain, flags, RADV_BO_PRIORITY_CS, 0, + &cs->ib_buffer); } static struct radeon_cmdbuf * @@ -292,12 +289,13 @@ radv_amdgpu_cs_create(struct radeon_winsys *ws, enum amd_ip_type ip_type, bool i return &cs->base; } -static uint32_t get_nop_packet(struct radv_amdgpu_cs *cs) +static uint32_t +get_nop_packet(struct radv_amdgpu_cs *cs) { - switch(cs->hw_ip) { + switch (cs->hw_ip) { case AMDGPU_HW_IP_GFX: case AMDGPU_HW_IP_COMPUTE: - return cs->ws->info.gfx_ib_pad_with_type2 ? PKT2_NOP_PAD : PKT3_NOP_PAD; + return cs->ws->info.gfx_ib_pad_with_type2 ? PKT2_NOP_PAD : PKT3_NOP_PAD; case AMDGPU_HW_IP_DMA: return cs->ws->info.gfx_level <= GFX6 ? 0xF0000000 : SDMA_NOP_PAD; case AMDGPU_HW_IP_UVD: @@ -460,8 +458,7 @@ radv_amdgpu_cs_reset(struct radeon_cmdbuf *_cs) } for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) { - unsigned hash = - ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1); + unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1); cs->virtual_buffer_hash_table[hash] = -1; } @@ -530,8 +527,7 @@ radv_amdgpu_cs_chain(struct radeon_cmdbuf *cs, struct radeon_cmdbuf *next_cs, bo cs->buf[cs->cdw - 4] = PKT3(PKT3_INDIRECT_BUFFER, 2, 0); cs->buf[cs->cdw - 3] = next_acs->ib.ib_mc_address; cs->buf[cs->cdw - 2] = next_acs->ib.ib_mc_address >> 32; - cs->buf[cs->cdw - 1] = - S_3F2_CHAIN(1) | S_3F2_VALID(1) | S_3F2_PRE_ENA(pre_ena) | next_acs->ib.size; + cs->buf[cs->cdw - 1] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | S_3F2_PRE_ENA(pre_ena) | next_acs->ib.size; return true; } @@ -656,8 +652,7 @@ radv_amdgpu_cs_add_buffer(struct radeon_cmdbuf *_cs, struct radeon_winsys_bo *_b } static void -radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cmdbuf *_child, - bool allow_ib2) +radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cmdbuf *_child, bool allow_ib2) { struct radv_amdgpu_cs *parent = radv_amdgpu_cs(_parent); struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child); @@ -668,8 +663,7 @@ radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cm return; for (unsigned i = 0; i < child->num_buffers; ++i) { - radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i].bo_handle, - child->handles[i].bo_priority); + radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i].bo_handle, child->handles[i].bo_priority); } for (unsigned i = 0; i < child->num_virtual_buffers; ++i) { @@ -753,8 +747,7 @@ radv_amdgpu_count_cs_array_bo(struct radeon_cmdbuf **cs_array, unsigned num_cs) } static unsigned -radv_amdgpu_add_cs_to_bo_list(struct radv_amdgpu_cs *cs, struct drm_amdgpu_bo_list_entry *handles, - unsigned num_handles) +radv_amdgpu_add_cs_to_bo_list(struct radv_amdgpu_cs *cs, struct drm_amdgpu_bo_list_entry *handles, unsigned num_handles) { if (!cs->num_buffers) return num_handles; @@ -814,8 +807,7 @@ radv_amdgpu_add_cs_array_to_bo_list(struct radeon_cmdbuf **cs_array, unsigned nu } static unsigned -radv_amdgpu_copy_global_bo_list(struct radv_amdgpu_winsys *ws, - struct drm_amdgpu_bo_list_entry *handles) +radv_amdgpu_copy_global_bo_list(struct radv_amdgpu_winsys *ws, struct drm_amdgpu_bo_list_entry *handles) { for (uint32_t i = 0; i < ws->global_bo_list.count; i++) { handles[i].bo_handle = ws->global_bo_list.bos[i]->bo_handle; @@ -826,12 +818,10 @@ radv_amdgpu_copy_global_bo_list(struct radv_amdgpu_winsys *ws, } static VkResult -radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws, struct radeon_cmdbuf **cs_array, - unsigned count, struct radeon_cmdbuf **initial_preamble_array, - unsigned num_initial_preambles, - struct radeon_cmdbuf **continue_preamble_array, - unsigned num_continue_preambles, struct radeon_cmdbuf **postamble_array, - unsigned num_postambles, unsigned *rnum_handles, +radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws, struct radeon_cmdbuf **cs_array, unsigned count, + struct radeon_cmdbuf **initial_preamble_array, unsigned num_initial_preambles, + struct radeon_cmdbuf **continue_preamble_array, unsigned num_continue_preambles, + struct radeon_cmdbuf **postamble_array, unsigned num_postambles, unsigned *rnum_handles, struct drm_amdgpu_bo_list_entry **rhandles) { struct drm_amdgpu_bo_list_entry *handles = NULL; @@ -844,8 +834,8 @@ radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws, struct radeon_cmdbuf **cs num_handles = radv_amdgpu_copy_global_bo_list(ws, handles); } else if (count == 1 && !num_initial_preambles && !num_continue_preambles && !num_postambles && - !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers && - !radv_amdgpu_cs(cs_array[0])->chained_to && !ws->global_bo_list.count) { + !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers && !radv_amdgpu_cs(cs_array[0])->chained_to && + !ws->global_bo_list.count) { struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)cs_array[0]; if (cs->num_buffers == 0) return VK_SUCCESS; @@ -859,10 +849,8 @@ radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws, struct radeon_cmdbuf **cs } else { unsigned total_buffer_count = ws->global_bo_list.count; total_buffer_count += radv_amdgpu_count_cs_array_bo(cs_array, count); - total_buffer_count += - radv_amdgpu_count_cs_array_bo(initial_preamble_array, num_initial_preambles); - total_buffer_count += - radv_amdgpu_count_cs_array_bo(continue_preamble_array, num_continue_preambles); + total_buffer_count += radv_amdgpu_count_cs_array_bo(initial_preamble_array, num_initial_preambles); + total_buffer_count += radv_amdgpu_count_cs_array_bo(continue_preamble_array, num_continue_preambles); total_buffer_count += radv_amdgpu_count_cs_array_bo(postamble_array, num_postambles); if (total_buffer_count == 0) @@ -874,12 +862,11 @@ radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws, struct radeon_cmdbuf **cs num_handles = radv_amdgpu_copy_global_bo_list(ws, handles); num_handles = radv_amdgpu_add_cs_array_to_bo_list(cs_array, count, handles, num_handles); - num_handles = radv_amdgpu_add_cs_array_to_bo_list( - initial_preamble_array, num_initial_preambles, handles, num_handles); - num_handles = radv_amdgpu_add_cs_array_to_bo_list( - continue_preamble_array, num_continue_preambles, handles, num_handles); num_handles = - radv_amdgpu_add_cs_array_to_bo_list(postamble_array, num_postambles, handles, num_handles); + radv_amdgpu_add_cs_array_to_bo_list(initial_preamble_array, num_initial_preambles, handles, num_handles); + num_handles = + radv_amdgpu_add_cs_array_to_bo_list(continue_preamble_array, num_continue_preambles, handles, num_handles); + num_handles = radv_amdgpu_add_cs_array_to_bo_list(postamble_array, num_postambles, handles, num_handles); } *rhandles = handles; @@ -891,17 +878,16 @@ radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws, struct radeon_cmdbuf **cs static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request *request) { - radv_amdgpu_request_to_fence(ctx, &ctx->last_submission[request->ip_type][request->ring], - request); + radv_amdgpu_request_to_fence(ctx, &ctx->last_submission[request->ip_type][request->ring], request); } static VkResult -radv_amdgpu_winsys_cs_submit_internal( - struct radv_amdgpu_ctx *ctx, int queue_idx, struct radv_winsys_sem_info *sem_info, - struct radeon_cmdbuf **cs_array, unsigned cs_count, struct radeon_cmdbuf **initial_preamble_cs, - unsigned initial_preamble_count, struct radeon_cmdbuf **continue_preamble_cs, - unsigned continue_preamble_count, struct radeon_cmdbuf **postamble_cs, unsigned postamble_count, - bool uses_shadow_regs) +radv_amdgpu_winsys_cs_submit_internal(struct radv_amdgpu_ctx *ctx, int queue_idx, struct radv_winsys_sem_info *sem_info, + struct radeon_cmdbuf **cs_array, unsigned cs_count, + struct radeon_cmdbuf **initial_preamble_cs, unsigned initial_preamble_count, + struct radeon_cmdbuf **continue_preamble_cs, unsigned continue_preamble_count, + struct radeon_cmdbuf **postamble_cs, unsigned postamble_count, + bool uses_shadow_regs) { VkResult result; @@ -910,8 +896,7 @@ radv_amdgpu_winsys_cs_submit_internal( struct radv_amdgpu_winsys *ws = last_cs->ws; assert(cs_count); - const unsigned num_pre_post_cs = - MAX2(initial_preamble_count, continue_preamble_count) + postamble_count; + const unsigned num_pre_post_cs = MAX2(initial_preamble_count, continue_preamble_count) + postamble_count; const unsigned ib_array_size = MIN2(RADV_MAX_IBS_PER_SUBMIT, num_pre_post_cs + cs_count); STACK_ARRAY(struct radv_amdgpu_cs_ib_info, ibs, ib_array_size); @@ -920,9 +905,9 @@ radv_amdgpu_winsys_cs_submit_internal( u_rwlock_rdlock(&ws->global_bo_list.lock); - result = radv_amdgpu_get_bo_list( - ws, &cs_array[0], cs_count, initial_preamble_cs, initial_preamble_count, continue_preamble_cs, - continue_preamble_count, postamble_cs, postamble_count, &num_handles, &handles); + result = radv_amdgpu_get_bo_list(ws, &cs_array[0], cs_count, initial_preamble_cs, initial_preamble_count, + continue_preamble_cs, continue_preamble_count, postamble_cs, postamble_count, + &num_handles, &handles); if (result != VK_SUCCESS) goto fail; @@ -961,8 +946,7 @@ radv_amdgpu_winsys_cs_submit_internal( if (cs_ib_idx == 0) { /* Make sure the whole CS fits into the same submission. */ unsigned cs_num_ib = cs->use_ib ? 1 : cs->num_old_ib_buffers; - if (i + cs_num_ib > ib_per_submit || - ibs_per_ip[cs->hw_ip] + cs_num_ib > max_ib_per_ip[cs->hw_ip]) + if (i + cs_num_ib > ib_per_submit || ibs_per_ip[cs->hw_ip] + cs_num_ib > max_ib_per_ip[cs->hw_ip]) break; if (cs->hw_ip != request.ip_type) { @@ -1066,17 +1050,15 @@ radv_amdgpu_cs_submit_zero(struct radv_amdgpu_ctx *ctx, enum amd_ip_type ip_type for (unsigned i = 0; i < sem_info->wait.timeline_syncobj_count; ++i) { int fd2; ret = amdgpu_cs_syncobj_export_sync_file2( - ctx->ws->dev, sem_info->wait.syncobj[i + sem_info->wait.syncobj_count], - sem_info->wait.points[i], 0, &fd2); + ctx->ws->dev, sem_info->wait.syncobj[i + sem_info->wait.syncobj_count], sem_info->wait.points[i], 0, &fd2); if (ret < 0) { /* This works around a kernel bug where the fence isn't copied if it is already * signalled. Since it is already signalled it is totally fine to not wait on it. * * kernel patch: https://patchwork.freedesktop.org/patch/465583/ */ uint64_t point; - ret = amdgpu_cs_syncobj_query2( - ctx->ws->dev, &sem_info->wait.syncobj[i + sem_info->wait.syncobj_count], &point, 1, - 0); + ret = amdgpu_cs_syncobj_query2(ctx->ws->dev, &sem_info->wait.syncobj[i + sem_info->wait.syncobj_count], + &point, 1, 0); if (!ret && point >= sem_info->wait.points[i]) continue; @@ -1116,8 +1098,7 @@ radv_amdgpu_cs_submit_zero(struct radv_amdgpu_ctx *ctx, enum amd_ip_type ip_type } } for (unsigned i = 0; i < sem_info->signal.timeline_syncobj_count; ++i) { - ret = amdgpu_cs_syncobj_transfer(ctx->ws->dev, - sem_info->signal.syncobj[i + sem_info->signal.syncobj_count], + ret = amdgpu_cs_syncobj_transfer(ctx->ws->dev, sem_info->signal.syncobj[i + sem_info->signal.syncobj_count], sem_info->signal.points[i], queue_syncobj, 0, 0); if (ret < 0) return VK_ERROR_DEVICE_LOST; @@ -1126,9 +1107,8 @@ radv_amdgpu_cs_submit_zero(struct radv_amdgpu_ctx *ctx, enum amd_ip_type ip_type } static VkResult -radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, - const struct radv_winsys_submit_info *submit, uint32_t wait_count, - const struct vk_sync_wait *waits, uint32_t signal_count, +radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, const struct radv_winsys_submit_info *submit, + uint32_t wait_count, const struct vk_sync_wait *waits, uint32_t signal_count, const struct vk_sync_signal *signals) { struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx); @@ -1197,10 +1177,9 @@ radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, result = radv_amdgpu_cs_submit_zero(ctx, submit->ip_type, submit->queue_index, &sem_info); } else { result = radv_amdgpu_winsys_cs_submit_internal( - ctx, submit->queue_index, &sem_info, submit->cs_array, submit->cs_count, - submit->initial_preamble_cs, submit->initial_preamble_count, submit->continue_preamble_cs, - submit->continue_preamble_count, submit->postamble_cs, submit->postamble_count, - submit->uses_shadow_regs); + ctx, submit->queue_index, &sem_info, submit->cs_array, submit->cs_count, submit->initial_preamble_cs, + submit->initial_preamble_count, submit->continue_preamble_cs, submit->continue_preamble_count, + submit->postamble_cs, submit->postamble_count, submit->uses_shadow_regs); } out: @@ -1222,8 +1201,7 @@ radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr) for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) { struct radv_amdgpu_winsys_bo *bo; - bo = (struct radv_amdgpu_winsys_bo *)(i == cs->num_old_ib_buffers ? cs->ib_buffer - : cs->old_ib_buffers[i].bo); + bo = (struct radv_amdgpu_winsys_bo *)(i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i].bo); if (addr >= bo->base.va && addr - bo->base.va < bo->size) { if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0) return (char *)ret + (addr - bo->base.va); @@ -1245,15 +1223,14 @@ radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr) } static void -radv_amdgpu_winsys_cs_dump(struct radeon_cmdbuf *_cs, FILE *file, const int *trace_ids, - int trace_id_count) +radv_amdgpu_winsys_cs_dump(struct radeon_cmdbuf *_cs, FILE *file, const int *trace_ids, int trace_id_count) { struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs; void *ib = radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address); int num_dw = cs->base.cdw; assert(ib); - ac_parse_ib(file, ib, num_dw, trace_ids, trace_id_count, "main IB", cs->ws->info.gfx_level, - cs->ws->info.family, radv_amdgpu_winsys_get_cpu_addr, cs); + ac_parse_ib(file, ib, num_dw, trace_ids, trace_id_count, "main IB", cs->ws->info.gfx_level, cs->ws->info.family, + radv_amdgpu_winsys_get_cpu_addr, cs); } static uint32_t @@ -1274,8 +1251,7 @@ radv_to_amdgpu_priority(enum radeon_ctx_priority radv_priority) } static VkResult -radv_amdgpu_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority priority, - struct radeon_winsys_ctx **rctx) +radv_amdgpu_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority priority, struct radeon_winsys_ctx **rctx) { struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx); @@ -1299,8 +1275,8 @@ radv_amdgpu_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority prior assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * 4 * sizeof(uint64_t) <= 4096); result = ws->base.buffer_create(&ws->base, 4096, 8, RADEON_DOMAIN_GTT, - RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING, - RADV_BO_PRIORITY_CS, 0, &ctx->fence_bo); + RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING, RADV_BO_PRIORITY_CS, 0, + &ctx->fence_bo); if (result != VK_SUCCESS) { goto fail_alloc; } @@ -1349,8 +1325,8 @@ radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx, enum amd_ip_type ip_t if (ctx->last_submission[ip_type][ring_index].fence.fence) { uint32_t expired; - int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index].fence, - 1000000000ull, 0, &expired); + int ret = + amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index].fence, 1000000000ull, 0, &expired); if (ret || !expired) return false; @@ -1438,8 +1414,7 @@ radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts, uint32 struct drm_amdgpu_cs_chunk *chunk, int chunk_id) { unsigned count = counts->syncobj_count + (queue_syncobj ? 1 : 0); - struct drm_amdgpu_cs_chunk_sem *syncobj = - malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * count); + struct drm_amdgpu_cs_chunk_sem *syncobj = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * count); if (!syncobj) return NULL; @@ -1458,14 +1433,11 @@ radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts, uint32 } static void * -radv_amdgpu_cs_alloc_timeline_syncobj_chunk(struct radv_winsys_sem_counts *counts, - uint32_t queue_syncobj, +radv_amdgpu_cs_alloc_timeline_syncobj_chunk(struct radv_winsys_sem_counts *counts, uint32_t queue_syncobj, struct drm_amdgpu_cs_chunk *chunk, int chunk_id) { - uint32_t count = - counts->syncobj_count + counts->timeline_syncobj_count + (queue_syncobj ? 1 : 0); - struct drm_amdgpu_cs_chunk_syncobj *syncobj = - malloc(sizeof(struct drm_amdgpu_cs_chunk_syncobj) * count); + uint32_t count = counts->syncobj_count + counts->timeline_syncobj_count + (queue_syncobj ? 1 : 0); + struct drm_amdgpu_cs_chunk_syncobj *syncobj = malloc(sizeof(struct drm_amdgpu_cs_chunk_syncobj) * count); if (!syncobj) return NULL; @@ -1498,12 +1470,9 @@ radv_amdgpu_cs_alloc_timeline_syncobj_chunk(struct radv_winsys_sem_counts *count static bool radv_amdgpu_cs_has_user_fence(struct radv_amdgpu_cs_request *request) { - return request->ip_type != AMDGPU_HW_IP_UVD && - request->ip_type != AMDGPU_HW_IP_VCE && - request->ip_type != AMDGPU_HW_IP_UVD_ENC && - request->ip_type != AMDGPU_HW_IP_VCN_DEC && - request->ip_type != AMDGPU_HW_IP_VCN_ENC && - request->ip_type != AMDGPU_HW_IP_VCN_JPEG; + return request->ip_type != AMDGPU_HW_IP_UVD && request->ip_type != AMDGPU_HW_IP_VCE && + request->ip_type != AMDGPU_HW_IP_UVD_ENC && request->ip_type != AMDGPU_HW_IP_VCN_DEC && + request->ip_type != AMDGPU_HW_IP_VCN_ENC && request->ip_type != AMDGPU_HW_IP_VCN_JPEG; } static VkResult @@ -1581,16 +1550,15 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request amdgpu_cs_chunk_fence_info_to_data(&fence_info, &chunk_data[i]); } - if (sem_info->cs_emit_wait && (sem_info->wait.timeline_syncobj_count || - sem_info->wait.syncobj_count || *queue_syncobj_wait)) { + if (sem_info->cs_emit_wait && + (sem_info->wait.timeline_syncobj_count || sem_info->wait.syncobj_count || *queue_syncobj_wait)) { if (ctx->ws->info.has_timeline_syncobj) { - wait_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk( - &sem_info->wait, queue_syncobj, &chunks[num_chunks], - AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT); + wait_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(&sem_info->wait, queue_syncobj, &chunks[num_chunks], + AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT); } else { - wait_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk( - &sem_info->wait, queue_syncobj, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_IN); + wait_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->wait, queue_syncobj, &chunks[num_chunks], + AMDGPU_CHUNK_ID_SYNCOBJ_IN); } if (!wait_syncobj) { result = VK_ERROR_OUT_OF_HOST_MEMORY; @@ -1605,11 +1573,10 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request if (sem_info->cs_emit_signal) { if (ctx->ws->info.has_timeline_syncobj) { signal_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk( - &sem_info->signal, queue_syncobj, &chunks[num_chunks], - AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL); + &sem_info->signal, queue_syncobj, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL); } else { - signal_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk( - &sem_info->signal, queue_syncobj, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_OUT); + signal_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->signal, queue_syncobj, &chunks[num_chunks], + AMDGPU_CHUNK_ID_SYNCOBJ_OUT); } if (!signal_syncobj) { result = VK_ERROR_OUT_OF_HOST_MEMORY; diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c index 6ad96d9..6a08fa3 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c @@ -26,8 +26,8 @@ * IN THE SOFTWARE. */ -#include "util/bitset.h" #include "radv_amdgpu_surface.h" +#include "util/bitset.h" #include "radv_amdgpu_winsys.h" #include "radv_private.h" #include "sid.h" @@ -91,8 +91,7 @@ radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws, const struct ac_surf_ config.is_1d = type == RADEON_SURF_TYPE_1D || type == RADEON_SURF_TYPE_1D_ARRAY; config.is_3d = type == RADEON_SURF_TYPE_3D; config.is_cube = type == RADEON_SURF_TYPE_CUBEMAP; - config.is_array = type == RADEON_SURF_TYPE_1D_ARRAY || - type == RADEON_SURF_TYPE_2D_ARRAY; + config.is_array = type == RADEON_SURF_TYPE_1D_ARRAY || type == RADEON_SURF_TYPE_2D_ARRAY; return ac_compute_surface(ws->addrlib, &ws->info, &config, mode, surf); } diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c index 5ef40a8..8884c13 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c @@ -114,8 +114,7 @@ radv_amdgpu_winsys_query_value(struct radeon_winsys *rws, enum radeon_value_id v amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &heap); return heap.heap_usage; case RADEON_VRAM_VIS_USAGE: - amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - &heap); + amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &heap); return heap.heap_usage; case RADEON_GTT_USAGE: amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &heap); @@ -137,8 +136,7 @@ radv_amdgpu_winsys_query_value(struct radeon_winsys *rws, enum radeon_value_id v } static bool -radv_amdgpu_winsys_read_registers(struct radeon_winsys *rws, unsigned reg_offset, - unsigned num_registers, uint32_t *out) +radv_amdgpu_winsys_read_registers(struct radeon_winsys *rws, unsigned reg_offset, unsigned num_registers, uint32_t *out) { struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws; @@ -240,8 +238,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, /* Check that options don't differ from the existing winsys. */ if (((debug_flags & RADV_DEBUG_ALL_BOS) && !ws->debug_all_bos) || ((debug_flags & RADV_DEBUG_HANG) && !ws->debug_log_bos) || - ((debug_flags & RADV_DEBUG_NO_IBS) && ws->use_ib_bos) || - (perftest_flags != ws->perftest)) { + ((debug_flags & RADV_DEBUG_NO_IBS) && ws->use_ib_bos) || (perftest_flags != ws->perftest)) { fprintf(stderr, "radv/amdgpu: Found options that differ from the existing winsys.\n"); return NULL; } diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h index 47c18f3..52fd4fa 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h @@ -29,8 +29,7 @@ #ifndef RADV_AMDGPU_WINSYS_PUBLIC_H #define RADV_AMDGPU_WINSYS_PUBLIC_H -struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, - uint64_t perftest_flags, +struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, bool reserve_vmid); struct radeon_winsys *radv_dummy_winsys_create(void); diff --git a/src/amd/vulkan/winsys/null/radv_null_bo.c b/src/amd/vulkan/winsys/null/radv_null_bo.c index d3df357..a13a75c 100644 --- a/src/amd/vulkan/winsys/null/radv_null_bo.c +++ b/src/amd/vulkan/winsys/null/radv_null_bo.c @@ -30,8 +30,8 @@ static VkResult radv_null_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment, - enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags, - unsigned priority, uint64_t address, struct radeon_winsys_bo **out_bo) + enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags, unsigned priority, + uint64_t address, struct radeon_winsys_bo **out_bo) { struct radv_null_winsys_bo *bo; @@ -66,8 +66,7 @@ radv_null_winsys_bo_unmap(struct radeon_winsys_bo *_bo) } static VkResult -radv_null_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, - bool resident) +radv_null_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, bool resident) { return VK_SUCCESS; } diff --git a/src/amd/vulkan/winsys/null/radv_null_cs.c b/src/amd/vulkan/winsys/null/radv_null_cs.c index 57ba2c0..ecbcd29 100644 --- a/src/amd/vulkan/winsys/null/radv_null_cs.c +++ b/src/amd/vulkan/winsys/null/radv_null_cs.c @@ -40,8 +40,7 @@ radv_null_cs(struct radeon_cmdbuf *base) } static VkResult -radv_null_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority priority, - struct radeon_winsys_ctx **rctx) +radv_null_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority priority, struct radeon_winsys_ctx **rctx) { struct radv_null_ctx *ctx = CALLOC_STRUCT(radv_null_ctx); diff --git a/src/amd/vulkan/winsys/null/radv_null_winsys.c b/src/amd/vulkan/winsys/null/radv_null_winsys.c index f8453a2..a9cf5ab 100644 --- a/src/amd/vulkan/winsys/null/radv_null_winsys.c +++ b/src/amd/vulkan/winsys/null/radv_null_winsys.c @@ -143,27 +143,23 @@ radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info) : info->gfx_level >= GFX7 ? 64 * 1024 : 32 * 1024; info->lds_encode_granularity = info->gfx_level >= GFX7 ? 128 * 4 : 64 * 4; - info->lds_alloc_granularity = - info->gfx_level >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity; + info->lds_alloc_granularity = info->gfx_level >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity; info->max_render_backends = gpu_info[info->family].num_render_backends; info->has_dedicated_vram = gpu_info[info->family].has_dedicated_vram; info->has_packed_math_16bit = info->gfx_level >= GFX9; - info->has_image_load_dcc_bug = - info->family == CHIP_NAVI23 || info->family == CHIP_VANGOGH; + info->has_image_load_dcc_bug = info->family == CHIP_NAVI23 || info->family == CHIP_VANGOGH; info->has_accelerated_dot_product = - info->family == CHIP_VEGA20 || - (info->family >= CHIP_MI100 && info->family != CHIP_NAVI10); + info->family == CHIP_VEGA20 || (info->family >= CHIP_MI100 && info->family != CHIP_NAVI10); info->address32_hi = info->gfx_level >= GFX9 ? 0xffff8000u : 0x0; info->has_rbplus = info->family == CHIP_STONEY || info->gfx_level >= GFX9; info->rbplus_allowed = - info->has_rbplus && - (info->family == CHIP_STONEY || info->family == CHIP_VEGA12 || info->family == CHIP_RAVEN || - info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->gfx_level >= GFX10_3); + info->has_rbplus && (info->family == CHIP_STONEY || info->family == CHIP_VEGA12 || info->family == CHIP_RAVEN || + info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->gfx_level >= GFX10_3); info->has_scheduled_fence_dependency = true; info->has_gang_submit = true; -- 2.7.4