#define VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR 4
#define VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR 8
-#define TYPE(type, align) \
- layout(buffer_reference, buffer_reference_align = align, scalar) buffer type##_ref \
- { \
- type value; \
+#define TYPE(type, align) \
+ layout(buffer_reference, buffer_reference_align = align, scalar) buffer type##_ref \
+ { \
+ type value; \
};
#define REF(type) type##_ref
#define NULL 0
#define DEREF(var) var.value
-#define SIZEOF(type) uint32_t(uint64_t(REF(type)(uint64_t(0))+1))
+#define SIZEOF(type) uint32_t(uint64_t(REF(type)(uint64_t(0)) + 1))
#define OFFSET(ptr, offset) (uint64_t(ptr) + offset)
aabb.min[comp] = otw_matrix[comp][3];
aabb.max[comp] = otw_matrix[comp][3];
for (uint32_t col = 0; col < 3; ++col) {
- aabb.min[comp] += min(otw_matrix[comp][col] * header.aabb.min[col],
- otw_matrix[comp][col] * header.aabb.max[col]);
- aabb.max[comp] += max(otw_matrix[comp][col] * header.aabb.min[col],
- otw_matrix[comp][col] * header.aabb.max[col]);
+ aabb.min[comp] +=
+ min(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]);
+ aabb.max[comp] +=
+ max(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]);
}
}
return aabb;
do {
/* Perform a memory barrier to refresh the current phase's end counter, in case
* another workgroup changed it. */
- memoryBarrier(
- gl_ScopeDevice, gl_StorageSemanticsBuffer,
- gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
+ memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
+ gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
/* The first invocation of the first workgroup in a new phase is responsible to initiate the
* switch to a new phase. It is only possible to switch to a new phase if all tasks of the
* end counter in turn notifies all invocations for that phase that it is safe to execute.
*/
if (global_task_index == DEREF(header).sync_data.current_phase_end_counter &&
- DEREF(header).sync_data.task_done_counter ==
- DEREF(header).sync_data.current_phase_end_counter) {
+ DEREF(header).sync_data.task_done_counter == DEREF(header).sync_data.current_phase_end_counter) {
if (DEREF(header).sync_data.next_phase_exit_flag != 0) {
DEREF(header).sync_data.phase_index = TASK_INDEX_INVALID;
- memoryBarrier(
- gl_ScopeDevice, gl_StorageSemanticsBuffer,
- gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
+ memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
+ gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
} else {
atomicAdd(DEREF(header).sync_data.phase_index, 1);
- DEREF(header).sync_data.current_phase_start_counter =
- DEREF(header).sync_data.current_phase_end_counter;
+ DEREF(header).sync_data.current_phase_start_counter = DEREF(header).sync_data.current_phase_end_counter;
/* Ensure the changes to the phase index and start/end counter are visible for other
* workgroup waiting in the loop. */
- memoryBarrier(
- gl_ScopeDevice, gl_StorageSemanticsBuffer,
- gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
+ memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
+ gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
atomicAdd(DEREF(header).sync_data.current_phase_end_counter,
DIV_ROUND_UP(task_count(header), gl_WorkGroupSize.x));
}
num_tasks_to_skip = shared_phase_index - phase_index;
- uint32_t local_task_index =
- global_task_index - DEREF(header).sync_data.current_phase_start_counter;
+ uint32_t local_task_index = global_task_index - DEREF(header).sync_data.current_phase_start_counter;
return local_task_index * gl_WorkGroupSize.x + gl_LocalInvocationID.x;
}
return true;
}
-#define PHASE(header) \
- for (; task_index != TASK_INDEX_INVALID && should_execute_phase(); \
- task_index = fetch_task(header, true))
+#define PHASE(header) \
+ for (; task_index != TASK_INDEX_INVALID && should_execute_phase(); task_index = fetch_task(header, true))
#endif
#endif
};
#define LBVH_RIGHT_CHILD_BIT_SHIFT 29
-#define LBVH_RIGHT_CHILD_BIT (1 << LBVH_RIGHT_CHILD_BIT_SHIFT)
+#define LBVH_RIGHT_CHILD_BIT (1 << LBVH_RIGHT_CHILD_BIT_SHIFT)
struct lbvh_node_info {
/* Number of children that have been processed (or are invalid/leaves) in
#define radv_bvh_node_box16 4
#define radv_bvh_node_box32 5
#define radv_bvh_node_instance 6
-#define radv_bvh_node_aabb 7
+#define radv_bvh_node_aabb 7
#define radv_ir_node_triangle 0
#define radv_ir_node_internal 1
#define radv_ir_node_instance 2
-#define radv_ir_node_aabb 3
+#define radv_ir_node_aabb 3
#define RADV_GEOMETRY_OPAQUE (1u << 31)
uint32_t reserved[4];
};
-#define RADV_BVH_ROOT_NODE radv_bvh_node_box32
+#define RADV_BVH_ROOT_NODE radv_bvh_node_box32
#define RADV_BVH_INVALID_NODE 0xffffffffu
/* If the task index is set to this value, there is no
*/
#include "radv_private.h"
-#include "vk_framebuffer.h"
#include "vk_common_entrypoints.h"
+#include "vk_framebuffer.h"
VKAPI_ATTR void VKAPI_CALL
-rage2_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
- const VkRenderPassBeginInfo* pRenderPassBegin,
+rage2_CmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo *pRenderPassBegin,
VkSubpassContents contents)
{
VK_FROM_HANDLE(vk_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
}
VKAPI_ATTR VkResult VKAPI_CALL
-rmv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
- const VkMappedMemoryRange *pMemoryRanges)
+rmv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges)
{
RADV_FROM_HANDLE(radv_device, device, _device);
- VkResult res =
- device->layer_dispatch.rmv.FlushMappedMemoryRanges(_device, memoryRangeCount, pMemoryRanges);
+ VkResult res = device->layer_dispatch.rmv.FlushMappedMemoryRanges(_device, memoryRangeCount, pMemoryRanges);
if (res != VK_SUCCESS || !device->vk.memory_trace_data.is_enabled)
return res;
}
VKAPI_ATTR VkResult VKAPI_CALL
-rmv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
- const VkMappedMemoryRange *pMemoryRanges)
+rmv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges)
{
RADV_FROM_HANDLE(radv_device, device, _device);
- VkResult res = device->layer_dispatch.rmv.InvalidateMappedMemoryRanges(_device, memoryRangeCount,
- pMemoryRanges);
+ VkResult res = device->layer_dispatch.rmv.InvalidateMappedMemoryRanges(_device, memoryRangeCount, pMemoryRanges);
if (res != VK_SUCCESS || !device->vk.memory_trace_data.is_enabled)
return res;
* IN THE SOFTWARE.
*/
-#include "util/u_process.h"
#include "meta/radv_meta.h"
+#include "util/u_process.h"
#include "radv_private.h"
#include "vk_acceleration_structure.h"
#include "vk_common_entrypoints.h"
/*
* TODO: This code is shared with RGP tracing and could be merged in a common helper.
*/
- bool frame_trigger =
- queue->device->rra_trace.elapsed_frames == queue->device->rra_trace.trace_frame;
+ bool frame_trigger = queue->device->rra_trace.elapsed_frames == queue->device->rra_trace.trace_frame;
if (queue->device->rra_trace.elapsed_frames <= queue->device->rra_trace.trace_frame)
++queue->device->rra_trace.elapsed_frames;
bool file_trigger = false;
#ifndef _WIN32
- if (queue->device->rra_trace.trigger_file &&
- access(queue->device->rra_trace.trigger_file, W_OK) == 0) {
+ if (queue->device->rra_trace.trigger_file && access(queue->device->rra_trace.trigger_file, W_OK) == 0) {
if (unlink(queue->device->rra_trace.trigger_file) == 0) {
file_trigger = true;
} else {
t = time(NULL);
now = *localtime(&t);
- snprintf(filename, sizeof(filename), "/tmp/%s_%04d.%02d.%02d_%02d.%02d.%02d.rra",
- util_get_process_name(), 1900 + now.tm_year, now.tm_mon + 1, now.tm_mday, now.tm_hour,
- now.tm_min, now.tm_sec);
+ snprintf(filename, sizeof(filename), "/tmp/%s_%04d.%02d.%02d_%02d.%02d.%02d.rra", util_get_process_name(),
+ 1900 + now.tm_year, now.tm_mon + 1, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec);
VkResult result = radv_rra_dump_trace(_queue, filename);
}
VKAPI_ATTR VkResult VKAPI_CALL
-rra_CreateAccelerationStructureKHR(VkDevice _device,
- const VkAccelerationStructureCreateInfoKHR *pCreateInfo,
+rra_CreateAccelerationStructureKHR(VkDevice _device, const VkAccelerationStructureCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkAccelerationStructureKHR *pAccelerationStructure)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
- VkResult result = device->layer_dispatch.rra.CreateAccelerationStructureKHR(
- _device, pCreateInfo, pAllocator, pAccelerationStructure);
+ VkResult result = device->layer_dispatch.rra.CreateAccelerationStructureKHR(_device, pCreateInfo, pAllocator,
+ pAccelerationStructure);
if (result != VK_SUCCESS)
return result;
fail_data:
free(data);
fail_as:
- device->layer_dispatch.rra.DestroyAccelerationStructureKHR(_device, *pAccelerationStructure,
- pAllocator);
+ device->layer_dispatch.rra.DestroyAccelerationStructureKHR(_device, *pAccelerationStructure, pAllocator);
*pAccelerationStructure = VK_NULL_HANDLE;
exit:
simple_mtx_unlock(&device->rra_trace.data_mtx);
}
static void
-handle_accel_struct_write(VkCommandBuffer commandBuffer,
- struct vk_acceleration_structure *accel_struct,
+handle_accel_struct_write(VkCommandBuffer commandBuffer, struct vk_acceleration_structure *accel_struct,
struct radv_rra_accel_struct_data *data)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
if (!data->va) {
data->va = vk_acceleration_structure_get_va(accel_struct);
- _mesa_hash_table_u64_insert(cmd_buffer->device->rra_trace.accel_struct_vas, data->va,
- accel_struct);
+ _mesa_hash_table_u64_insert(cmd_buffer->device->rra_trace.accel_struct_vas, data->va, accel_struct);
}
if (!data->buffer)
}
VKAPI_ATTR void VKAPI_CALL
-rra_CmdBuildAccelerationStructuresKHR(
- VkCommandBuffer commandBuffer, uint32_t infoCount,
- const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
- const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
+rra_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
+ const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
+ const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- cmd_buffer->device->layer_dispatch.rra.CmdBuildAccelerationStructuresKHR(
- commandBuffer, infoCount, pInfos, ppBuildRangeInfos);
+ cmd_buffer->device->layer_dispatch.rra.CmdBuildAccelerationStructuresKHR(commandBuffer, infoCount, pInfos,
+ ppBuildRangeInfos);
simple_mtx_lock(&cmd_buffer->device->rra_trace.data_mtx);
for (uint32_t i = 0; i < infoCount; ++i) {
RADV_FROM_HANDLE(vk_acceleration_structure, structure, pInfos[i].dstAccelerationStructure);
- struct hash_entry *entry = _mesa_hash_table_search(
- cmd_buffer->device->rra_trace.accel_structs, structure);
+ struct hash_entry *entry = _mesa_hash_table_search(cmd_buffer->device->rra_trace.accel_structs, structure);
assert(entry);
struct radv_rra_accel_struct_data *data = entry->data;
}
VKAPI_ATTR void VKAPI_CALL
-rra_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,
- const VkCopyAccelerationStructureInfoKHR *pInfo)
+rra_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
cmd_buffer->device->layer_dispatch.rra.CmdCopyAccelerationStructureKHR(commandBuffer, pInfo);
simple_mtx_lock(&cmd_buffer->device->rra_trace.data_mtx);
RADV_FROM_HANDLE(vk_acceleration_structure, structure, pInfo->dst);
- struct hash_entry *entry =
- _mesa_hash_table_search(cmd_buffer->device->rra_trace.accel_structs, structure);
+ struct hash_entry *entry = _mesa_hash_table_search(cmd_buffer->device->rra_trace.accel_structs, structure);
assert(entry);
struct radv_rra_accel_struct_data *data = entry->data;
const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- cmd_buffer->device->layer_dispatch.rra.CmdCopyMemoryToAccelerationStructureKHR(commandBuffer,
- pInfo);
+ cmd_buffer->device->layer_dispatch.rra.CmdCopyMemoryToAccelerationStructureKHR(commandBuffer, pInfo);
simple_mtx_lock(&cmd_buffer->device->rra_trace.data_mtx);
RADV_FROM_HANDLE(vk_acceleration_structure, structure, pInfo->dst);
- struct hash_entry *entry =
- _mesa_hash_table_search(cmd_buffer->device->rra_trace.accel_structs, structure);
+ struct hash_entry *entry = _mesa_hash_table_search(cmd_buffer->device->rra_trace.accel_structs, structure);
assert(entry);
struct radv_rra_accel_struct_data *data = entry->data;
RADV_FROM_HANDLE(vk_acceleration_structure, structure, _structure);
- struct hash_entry *entry =
- _mesa_hash_table_search(device->rra_trace.accel_structs, structure);
+ struct hash_entry *entry = _mesa_hash_table_search(device->rra_trace.accel_structs, structure);
assert(entry);
struct radv_rra_accel_struct_data *data = entry->data;
* IN THE SOFTWARE.
*/
-#include "vk_common_entrypoints.h"
-#include "wsi_common_entrypoints.h"
#include "radv_cs.h"
#include "radv_private.h"
#include "radv_shader.h"
+#include "vk_common_entrypoints.h"
+#include "wsi_common_entrypoints.h"
#include "ac_rgp.h"
#include "ac_sqtt.h"
void
-radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer,
- struct radv_graphics_pipeline *pipeline)
+radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline)
{
const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level;
struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc;
}
static VkResult
-radv_sqtt_reloc_graphics_shaders(struct radv_device *device,
- struct radv_graphics_pipeline *pipeline)
+radv_sqtt_reloc_graphics_shaders(struct radv_device *device, struct radv_graphics_pipeline *pipeline)
{
struct radv_shader_dma_submission *submission = NULL;
struct radv_sqtt_shaders_reloc *reloc;
uint64_t offset = 0;
if (device->shader_use_invisible_vram) {
- submission =
- radv_shader_dma_get_submission(device, reloc->bo, slab_va, code_size);
+ submission = radv_shader_dma_get_submission(device, reloc->bo, slab_va, code_size);
if (!submission)
return VK_ERROR_UNKNOWN;
}
}
static void
-radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer,
- enum rgp_sqtt_marker_general_api_type api_type)
+radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type)
{
struct rgp_sqtt_marker_general_api marker = {0};
}
static void
-radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer,
- enum rgp_sqtt_marker_general_api_type api_type)
+radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type)
{
struct rgp_sqtt_marker_general_api marker = {0};
}
static void
-radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer,
- enum rgp_sqtt_marker_event_type api_type, uint32_t vertex_offset_user_data,
- uint32_t instance_offset_user_data, uint32_t draw_index_user_data)
+radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type,
+ uint32_t vertex_offset_user_data, uint32_t instance_offset_user_data,
+ uint32_t draw_index_user_data)
{
struct rgp_sqtt_marker_event marker = {0};
}
static void
-radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer,
- enum rgp_sqtt_marker_event_type api_type, uint32_t x, uint32_t y,
- uint32_t z)
+radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type,
+ uint32_t x, uint32_t y, uint32_t z)
{
struct rgp_sqtt_marker_event_with_dims marker = {0};
}
static void
-radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer,
- enum rgp_sqtt_marker_user_event_type type, const char *str)
+radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_user_event_type type,
+ const char *str)
{
if (type == UserEventPop) {
assert(str == NULL);
return;
/* Reserve a command buffer ID for SQTT. */
- enum amd_ip_type ip_type =
- radv_queue_family_to_ring(cmd_buffer->device->physical_device, cmd_buffer->qf);
- union rgp_sqtt_marker_cb_id cb_id =
- ac_sqtt_get_next_cmdbuf_id(&cmd_buffer->device->sqtt, ip_type);
+ enum amd_ip_type ip_type = radv_queue_family_to_ring(cmd_buffer->device->physical_device, cmd_buffer->qf);
+ union rgp_sqtt_marker_cb_id cb_id = ac_sqtt_get_next_cmdbuf_id(&cmd_buffer->device->sqtt, ip_type);
cmd_buffer->sqtt_cb_id = cb_id.all;
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;
if (likely(!cmd_buffer->device->sqtt.bo))
return;
- radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX,
- UINT_MAX);
+ radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX);
}
void
}
void
-radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer,
- VkImageAspectFlagBits aspects)
+radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlagBits aspects)
{
- cmd_buffer->state.current_event_type = (aspects & VK_IMAGE_ASPECT_COLOR_BIT)
- ? EventRenderPassColorClear
- : EventRenderPassDepthStencilClear;
+ cmd_buffer->state.current_event_type =
+ (aspects & VK_IMAGE_ASPECT_COLOR_BIT) ? EventRenderPassColorClear : EventRenderPassDepthStencilClear;
}
void
}
void
-radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_barrier_data *barrier)
+radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier)
{
struct rgp_sqtt_marker_layout_transition marker = {0};
}
static void
-radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint pipelineBindPoint, struct radv_pipeline *pipeline)
+radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint,
+ struct radv_pipeline *pipeline)
{
struct rgp_sqtt_marker_pipeline_bind marker = {0};
return VK_SUCCESS;
}
-#define EVENT_MARKER_BASE(cmd_name, api_name, event_name, ...) \
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
- radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
- cmd_buffer->state.current_event_type = EventCmd##event_name; \
- cmd_buffer->device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \
- cmd_buffer->state.current_event_type = EventInternalUnknown; \
+#define EVENT_MARKER_BASE(cmd_name, api_name, event_name, ...) \
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
+ radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
+ cmd_buffer->state.current_event_type = EventCmd##event_name; \
+ cmd_buffer->device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \
+ cmd_buffer->state.current_event_type = EventInternalUnknown; \
radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
-#define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) \
- EVENT_MARKER_BASE(cmd_name, api_name, api_name, __VA_ARGS__);
+#define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) EVENT_MARKER_BASE(cmd_name, api_name, api_name, __VA_ARGS__);
-#define EVENT_MARKER(cmd_name, ...) \
- EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
+#define EVENT_MARKER(cmd_name, ...) EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount,
- uint32_t firstVertex, uint32_t firstInstance)
+sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex,
+ uint32_t firstInstance)
{
EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,
- uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)
+sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex,
+ int32_t vertexOffset, uint32_t firstInstance)
{
- EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset,
- firstInstance);
+ EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
- uint32_t drawCount, uint32_t stride)
+sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount,
+ uint32_t stride)
{
EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, stride);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
- uint32_t drawCount, uint32_t stride)
+sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount,
+ uint32_t stride)
{
EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, drawCount, stride);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
- VkBuffer countBuffer, VkDeviceSize countBufferOffset,
- uint32_t maxDrawCount, uint32_t stride)
+sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer,
+ VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride)
{
- EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset,
- maxDrawCount, stride);
+ EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer,
- VkDeviceSize offset, VkBuffer countBuffer,
- VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
+sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
+ VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
uint32_t stride)
{
- EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer,
- countBufferOffset, maxDrawCount, stride);
+ EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount,
+ stride);
}
VKAPI_ATTR void VKAPI_CALL
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
- VkDeviceSize fillSize, uint32_t data)
+sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize fillSize,
+ uint32_t data)
{
EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, data);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
- VkDeviceSize dataSize, const void *pData)
+sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize,
+ const void *pData)
{
EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, dataSize, pData);
}
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
- const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
+sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
{
- EVENT_MARKER_ALIAS(CopyBufferToImage2, CopyBufferToImage, commandBuffer,
- pCopyBufferToImageInfo);
+ EVENT_MARKER_ALIAS(CopyBufferToImage2, CopyBufferToImage, commandBuffer, pCopyBufferToImageInfo);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
- const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
+sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
{
- EVENT_MARKER_ALIAS(CopyImageToBuffer2, CopyImageToBuffer, commandBuffer,
- pCopyImageToBufferInfo);
+ EVENT_MARKER_ALIAS(CopyImageToBuffer2, CopyImageToBuffer, commandBuffer, pCopyImageToBufferInfo);
}
VKAPI_ATTR void VKAPI_CALL
VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
- const VkClearColorValue *pColor, uint32_t rangeCount,
- const VkImageSubresourceRange *pRanges)
+ const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)
{
EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, pColor, rangeCount, pRanges);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h,
- VkImageLayout imageLayout,
+sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
const VkImageSubresourceRange *pRanges)
{
- EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil,
- rangeCount, pRanges);
+ EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil, rangeCount, pRanges);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
- const VkClearAttachment *pAttachments, uint32_t rectCount,
- const VkClearRect *pRects)
+sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments,
+ uint32_t rectCount, const VkClearRect *pRects)
{
EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer,
- const VkResolveImageInfo2 *pResolveImageInfo)
+sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 *pResolveImageInfo)
{
EVENT_MARKER_ALIAS(ResolveImage2, ResolveImage, commandBuffer, pResolveImageInfo);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents,
- const VkDependencyInfo* pDependencyInfos)
+sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
+ const VkDependencyInfo *pDependencyInfos)
{
- EVENT_MARKER_ALIAS(WaitEvents2, WaitEvents, commandBuffer, eventCount, pEvents,
- pDependencyInfos);
+ EVENT_MARKER_ALIAS(WaitEvents2, WaitEvents, commandBuffer, eventCount, pEvents, pDependencyInfos);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
- const VkDependencyInfo* pDependencyInfo)
+sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, const VkDependencyInfo *pDependencyInfo)
{
EVENT_MARKER_ALIAS(PipelineBarrier2, PipelineBarrier, commandBuffer, pDependencyInfo);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
- uint32_t queryCount)
+sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount)
{
EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, queryCount);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
- uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer,
- VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags)
+sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
+ uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride,
+ VkQueryResultFlags flags)
{
- EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer,
- dstOffset, stride, flags);
+ EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer, dstOffset, stride,
+ flags);
}
-#define EVENT_RT_MARKER(cmd_name, ...) \
- EVENT_MARKER_BASE(cmd_name, Dispatch, cmd_name, __VA_ARGS__);
+#define EVENT_RT_MARKER(cmd_name, ...) EVENT_MARKER_BASE(cmd_name, Dispatch, cmd_name, __VA_ARGS__);
-#define EVENT_RT_MARKER_ALIAS(cmd_name, event_name, ...) \
- EVENT_MARKER_BASE(cmd_name, Dispatch, event_name, __VA_ARGS__);
+#define EVENT_RT_MARKER_ALIAS(cmd_name, event_name, ...) EVENT_MARKER_BASE(cmd_name, Dispatch, event_name, __VA_ARGS__);
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer,
- const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
+sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
- const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable,
- uint32_t width, uint32_t height, uint32_t depth)
+ const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, uint32_t width,
+ uint32_t height, uint32_t depth)
{
EVENT_RT_MARKER(TraceRaysKHR, commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable,
pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth);
const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable,
VkDeviceAddress indirectDeviceAddress)
{
- EVENT_RT_MARKER(TraceRaysIndirectKHR, commandBuffer, pRaygenShaderBindingTable,
- pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable,
- indirectDeviceAddress);
+ EVENT_RT_MARKER(TraceRaysIndirectKHR, commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable,
+ pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress);
}
VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer, VkDeviceAddress indirectDeviceAddress)
{
- EVENT_RT_MARKER_ALIAS(TraceRaysIndirect2KHR, TraceRaysIndirectKHR, commandBuffer,
- indirectDeviceAddress);
+ EVENT_RT_MARKER_ALIAS(TraceRaysIndirect2KHR, TraceRaysIndirectKHR, commandBuffer, indirectDeviceAddress);
}
VKAPI_ATTR void VKAPI_CALL
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
{
- EVENT_RT_MARKER(BuildAccelerationStructuresKHR, commandBuffer, infoCount, pInfos,
- ppBuildRangeInfos);
+ EVENT_RT_MARKER(BuildAccelerationStructuresKHR, commandBuffer, infoCount, pInfos, ppBuildRangeInfos);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,
- const VkCopyAccelerationStructureInfoKHR *pInfo)
+sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
{
EVENT_RT_MARKER(CopyAccelerationStructureKHR, commandBuffer, pInfo);
}
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer buffer,
- VkDeviceSize offset, uint32_t drawCount, uint32_t stride)
+sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
+ uint32_t drawCount, uint32_t stride)
{
EVENT_MARKER(DrawMeshTasksIndirectEXT, commandBuffer, buffer, offset, drawCount, stride);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer buffer,
- VkDeviceSize offset, VkBuffer countBuffer,
- VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
+sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
+ VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
uint32_t stride)
{
- EVENT_MARKER(DrawMeshTasksIndirectCountEXT, commandBuffer, buffer, offset, countBuffer,
- countBufferOffset, maxDrawCount, stride);
+ EVENT_MARKER(DrawMeshTasksIndirectCountEXT, commandBuffer, buffer, offset, countBuffer, countBufferOffset,
+ maxDrawCount, stride);
}
#undef EVENT_RT_MARKER_ALIAS
#undef EVENT_MARKER_ALIAS
#undef EVENT_MARKER_BASE
-#define API_MARKER_ALIAS(cmd_name, api_name, ...) \
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
- radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
- cmd_buffer->device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \
+#define API_MARKER_ALIAS(cmd_name, api_name, ...) \
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
+ radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
+ cmd_buffer->device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \
radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
#define API_MARKER(cmd_name, ...) API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
- VkPipeline _pipeline)
+sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline)
{
RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount,
const uint32_t *pDynamicOffsets)
{
- API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet,
- descriptorSetCount, pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);
+ API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet, descriptorSetCount,
+ pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
- VkIndexType indexType)
+sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType)
{
API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding,
- uint32_t bindingCount, const VkBuffer *pBuffers,
- const VkDeviceSize *pOffsets, const VkDeviceSize* pSizes,
- const VkDeviceSize* pStrides)
+sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount,
+ const VkBuffer *pBuffers, const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes,
+ const VkDeviceSize *pStrides)
{
- API_MARKER_ALIAS(BindVertexBuffers2, BindVertexBuffers, commandBuffer, firstBinding,
- bindingCount, pBuffers, pOffsets, pSizes, pStrides);
+ API_MARKER_ALIAS(BindVertexBuffers2, BindVertexBuffers, commandBuffer, firstBinding, bindingCount, pBuffers,
+ pOffsets, pSizes, pStrides);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query,
- VkQueryControlFlags flags)
+sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags)
{
API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags);
}
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage,
- VkQueryPool queryPool, uint32_t query)
+sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkQueryPool queryPool,
+ uint32_t query)
{
API_MARKER_ALIAS(WriteTimestamp2, WriteTimestamp, commandBuffer, stage, queryPool, query);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
- VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
- const void *pValues)
+sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags,
+ uint32_t offset, uint32_t size, const void *pValues)
{
API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer,
- const VkRenderingInfo *pRenderingInfo)
+sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo)
{
API_MARKER_ALIAS(BeginRendering, BeginRenderPass, commandBuffer, pRenderingInfo);
}
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount,
- const VkCommandBuffer *pCmdBuffers)
+sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers)
{
API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, pCmdBuffers);
}
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor,
- float depthBiasClamp, float depthBiasSlopeFactor)
+sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp,
+ float depthBiasSlopeFactor)
{
- API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp,
- depthBiasSlopeFactor);
+ API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor);
}
VKAPI_ATTR void VKAPI_CALL
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
- uint32_t compareMask)
+sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask)
{
API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
- uint32_t writeMask)
+sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask)
{
API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
- uint32_t reference)
+sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference)
{
API_MARKER(SetStencilReference, commandBuffer, faceMask, reference);
}
/* VK_EXT_debug_marker */
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer,
- const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
+sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_write_user_event_marker(cmd_buffer, UserEventPush, pMarkerInfo->pMarkerName);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer,
- const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
+sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pMarkerInfo->pMarkerName);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,
- const VkDebugUtilsLabelEXT *pLabelInfo)
+sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_write_user_event_marker(cmd_buffer, UserEventPush, pLabelInfo->pLabelName);
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,
- const VkDebugUtilsLabelEXT *pLabelInfo)
+sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pLabelInfo->pLabelName);
/* Destroy the PSO correlation record. */
simple_mtx_lock(&pso_correlation->lock);
- list_for_each_entry_safe(struct rgp_pso_correlation_record, record, &pso_correlation->record,
- list)
- {
+ list_for_each_entry_safe (struct rgp_pso_correlation_record, record, &pso_correlation->record, list) {
if (record->pipeline_hash[0] == pipeline->pipeline_hash) {
pso_correlation->record_count--;
list_del(&record->list);
/* Destroy the code object loader record. */
simple_mtx_lock(&loader_events->lock);
- list_for_each_entry_safe(struct rgp_loader_events_record, record, &loader_events->record, list)
- {
+ list_for_each_entry_safe (struct rgp_loader_events_record, record, &loader_events->record, list) {
if (record->code_object_hash[0] == pipeline->pipeline_hash) {
loader_events->record_count--;
list_del(&record->list);
/* Destroy the code object record. */
simple_mtx_lock(&code_object->lock);
- list_for_each_entry_safe(struct rgp_code_object_record, record, &code_object->record, list)
- {
+ list_for_each_entry_safe (struct rgp_code_object_record, record, &code_object->record, list) {
if (record->pipeline_hash[0] == pipeline->pipeline_hash) {
code_object->record_count--;
list_del(&record->list);
VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
- const VkGraphicsPipelineCreateInfo *pCreateInfos,
- const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
+ const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipelines)
{
RADV_FROM_HANDLE(radv_device, device, _device);
VkResult result;
- result = device->layer_dispatch.rgp.CreateGraphicsPipelines(
- _device, pipelineCache, count, pCreateInfos, pAllocator, pPipelines);
+ result = device->layer_dispatch.rgp.CreateGraphicsPipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
+ pPipelines);
if (result != VK_SUCCESS)
return result;
VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
- const VkComputePipelineCreateInfo *pCreateInfos,
- const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
+ const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipelines)
{
RADV_FROM_HANDLE(radv_device, device, _device);
VkResult result;
- result = device->layer_dispatch.rgp.CreateComputePipelines(_device, pipelineCache, count,
- pCreateInfos, pAllocator, pPipelines);
+ result = device->layer_dispatch.rgp.CreateComputePipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
+ pPipelines);
if (result != VK_SUCCESS)
return result;
RADV_FROM_HANDLE(radv_device, device, _device);
VkResult result;
- result = device->layer_dispatch.rgp.CreateRayTracingPipelinesKHR(
- _device, deferredOperation, pipelineCache, count, pCreateInfos, pAllocator, pPipelines);
+ result = device->layer_dispatch.rgp.CreateRayTracingPipelinesKHR(_device, deferredOperation, pipelineCache, count,
+ pCreateInfos, pAllocator, pPipelines);
if (result != VK_SUCCESS)
return result;
}
VKAPI_ATTR void VKAPI_CALL
-sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline,
- const VkAllocationCallbacks *pAllocator)
+sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
}
void
-radv_meta_save(struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer,
- uint32_t flags)
+radv_meta_save(struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer, uint32_t flags)
{
- VkPipelineBindPoint bind_point = flags & RADV_META_SAVE_GRAPHICS_PIPELINE
- ? VK_PIPELINE_BIND_POINT_GRAPHICS
- : VK_PIPELINE_BIND_POINT_COMPUTE;
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
+ VkPipelineBindPoint bind_point =
+ flags & RADV_META_SAVE_GRAPHICS_PIPELINE ? VK_PIPELINE_BIND_POINT_GRAPHICS : VK_PIPELINE_BIND_POINT_COMPUTE;
+ struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point);
assert(flags & (RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_COMPUTE_PIPELINE));
void
radv_meta_restore(const struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer)
{
- VkPipelineBindPoint bind_point = state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE
- ? VK_PIPELINE_BIND_POINT_GRAPHICS
- : VK_PIPELINE_BIND_POINT_COMPUTE;
+ VkPipelineBindPoint bind_point = state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE ? VK_PIPELINE_BIND_POINT_GRAPHICS
+ : VK_PIPELINE_BIND_POINT_COMPUTE;
if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
if (state->old_graphics_pipeline) {
if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE)
stages |= VK_SHADER_STAGE_ALL_GRAPHICS;
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), VK_NULL_HANDLE, stages, 0,
- MAX_PUSH_CONSTANTS_SIZE, state->push_constants);
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), VK_NULL_HANDLE, stages, 0, MAX_PUSH_CONSTANTS_SIZE,
+ state->push_constants);
}
if (state->flags & RADV_META_SAVE_RENDER) {
* VkImageViewCreateInfo::subresourceRange::baseArrayLayer.
*/
uint32_t
-radv_meta_get_iview_layer(const struct radv_image *dst_image,
- const VkImageSubresourceLayers *dst_subresource,
+radv_meta_get_iview_layer(const struct radv_image *dst_image, const VkImageSubresourceLayers *dst_subresource,
const VkOffset3D *dst_offset)
{
switch (dst_image->vk.image_type) {
}
}
-static VKAPI_ATTR void * VKAPI_CALL
+static VKAPI_ATTR void *VKAPI_CALL
meta_alloc(void *_device, size_t size, size_t alignment, VkSystemAllocationScope allocationScope)
{
struct radv_device *device = _device;
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
}
-static VKAPI_ATTR void * VKAPI_CALL
-meta_realloc(void *_device, void *original, size_t size, size_t alignment,
- VkSystemAllocationScope allocationScope)
+static VKAPI_ATTR void *VKAPI_CALL
+meta_realloc(void *_device, void *original, size_t size, size_t alignment, VkSystemAllocationScope allocationScope)
{
struct radv_device *device = _device;
return device->vk.alloc.pfnReallocation(device->vk.alloc.pUserData, original, size, alignment,
create_info.pInitialData = data;
fail:
- result = vk_common_CreatePipelineCache(radv_device_to_handle(device), &create_info, NULL,
- &device->meta_state.cache);
+ result = vk_common_CreatePipelineCache(radv_device_to_handle(device), &create_info, NULL, &device->meta_state.cache);
if (result == VK_SUCCESS) {
device->meta_state.initial_cache_entries = num_cache_entries(device->meta_state.cache);
ret = device->meta_state.initial_cache_entries > 0;
if (num_cache_entries(device->meta_state.cache) <= device->meta_state.initial_cache_entries)
return;
- if (vk_common_GetPipelineCacheData(radv_device_to_handle(device), device->meta_state.cache,
- &size, NULL))
+ if (vk_common_GetPipelineCacheData(radv_device_to_handle(device), device->meta_state.cache, &size, NULL))
return;
if (!radv_builtin_cache_path(path))
if (!data)
goto fail;
- if (vk_common_GetPipelineCacheData(radv_device_to_handle(device), device->meta_state.cache,
- &size, data))
+ if (vk_common_GetPipelineCacheData(radv_device_to_handle(device), device->meta_state.cache, &size, data))
goto fail;
if (write(fd, data, size) == -1)
goto fail;
}
void
-radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer,
- int samples, nir_variable *input_img, nir_variable *color,
- nir_ssa_def *img_coord)
+radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples,
+ nir_variable *input_img, nir_variable *color, nir_ssa_def *img_coord)
{
nir_deref_instr *input_img_deref = nir_build_deref_var(b, input_img);
nir_ssa_def *sample0 = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, 0));
nir_ssa_def *
radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding)
{
- nir_ssa_def *rsrc = nir_vulkan_resource_index(b, 3, 32, nir_imm_int(b, 0), .desc_set = desc_set,
- .binding = binding);
+ nir_ssa_def *rsrc = nir_vulkan_resource_index(b, 3, 32, nir_imm_int(b, 0), .desc_set = desc_set, .binding = binding);
return nir_trim_vector(b, rsrc, 2);
}
nir_ssa_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask);
nir_ssa_def *block_ids = nir_channels(b, nir_load_workgroup_id(b, 32), mask);
- nir_ssa_def *block_size = nir_channels(
- b,
- nir_imm_ivec4(b, b->shader->info.workgroup_size[0], b->shader->info.workgroup_size[1],
- b->shader->info.workgroup_size[2], 0),
- mask);
+ nir_ssa_def *block_size =
+ nir_channels(b,
+ nir_imm_ivec4(b, b->shader->info.workgroup_size[0], b->shader->info.workgroup_size[1],
+ b->shader->info.workgroup_size[2], 0),
+ mask);
return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids);
}
VkResult radv_device_init_dgc_prepare_state(struct radv_device *device);
void radv_device_finish_dgc_prepare_state(struct radv_device *device);
-void radv_meta_save(struct radv_meta_saved_state *saved_state, struct radv_cmd_buffer *cmd_buffer,
- uint32_t flags);
+void radv_meta_save(struct radv_meta_saved_state *saved_state, struct radv_cmd_buffer *cmd_buffer, uint32_t flags);
-void radv_meta_restore(const struct radv_meta_saved_state *state,
- struct radv_cmd_buffer *cmd_buffer);
+void radv_meta_restore(const struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer);
VkImageViewType radv_meta_get_view_type(const struct radv_image *image);
-uint32_t radv_meta_get_iview_layer(const struct radv_image *dst_image,
- const VkImageSubresourceLayers *dst_subresource,
+uint32_t radv_meta_get_iview_layer(const struct radv_image *dst_image, const VkImageSubresourceLayers *dst_subresource,
const VkOffset3D *dst_offset);
struct radv_meta_blit2d_surf {
void radv_meta_begin_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_saved_state *save);
void radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img,
- struct radv_meta_blit2d_buffer *src_buf, struct radv_meta_blit2d_surf *dst,
- unsigned num_rects, struct radv_meta_blit2d_rect *rects);
+ struct radv_meta_blit2d_buffer *src_buf, struct radv_meta_blit2d_surf *dst, unsigned num_rects,
+ struct radv_meta_blit2d_rect *rects);
void radv_meta_end_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_saved_state *save);
VkResult radv_device_init_meta_bufimage_state(struct radv_device *device);
void radv_device_finish_meta_bufimage_state(struct radv_device *device);
-void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src,
+void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
struct radv_meta_blit2d_buffer *dst, unsigned num_rects,
struct radv_meta_blit2d_rect *rects);
-void radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_buffer *src,
+void radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src,
struct radv_meta_blit2d_surf *dst, unsigned num_rects,
struct radv_meta_blit2d_rect *rects);
-void radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src,
+void radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
struct radv_meta_blit2d_surf *dst, unsigned num_rects,
struct radv_meta_blit2d_rect *rects);
void radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
void radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange,
struct radv_sample_locations_state *sample_locs);
-void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
+void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange);
void radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange);
void radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image);
void radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange);
-void radv_copy_vrs_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *vrs_image,
- const VkRect2D *rect, struct radv_image *dst_image,
- struct radv_buffer *htile_buffer, bool read_htile_value);
+void radv_copy_vrs_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *vrs_image, const VkRect2D *rect,
+ struct radv_image *dst_image, struct radv_buffer *htile_buffer, bool read_htile_value);
-bool radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image *src_image, const struct radv_image *dst_image,
- unsigned num_rects, const struct radv_meta_blit2d_rect *rects);
+bool radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
+ const struct radv_image *dst_image, unsigned num_rects,
+ const struct radv_meta_blit2d_rect *rects);
void radv_fmask_copy(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
struct radv_meta_blit2d_surf *dst);
-void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *src_image, VkFormat src_format,
- VkImageLayout src_image_layout, struct radv_image *dst_image,
+void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+ VkFormat src_format, VkImageLayout src_image_layout, struct radv_image *dst_image,
VkFormat dst_format, VkImageLayout dst_image_layout,
const VkImageResolve2 *region);
-void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *src_image, VkImageLayout src_image_layout,
- struct radv_image *dst_image,
- VkImageLayout dst_image_layout,
- const VkImageResolve2 *region);
+void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+ VkImageLayout src_image_layout, struct radv_image *dst_image,
+ VkImageLayout dst_image_layout, const VkImageResolve2 *region);
void radv_decompress_resolve_rendering_src(struct radv_cmd_buffer *cmd_buffer);
uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
const VkImageSubresourceRange *range, uint32_t value);
-void radv_update_buffer_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const void *data,
- uint64_t size);
+void radv_update_buffer_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const void *data, uint64_t size);
-void radv_meta_decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- VkImageLayout layout, const VkImageSubresourceLayers *subresource,
- VkOffset3D offset, VkExtent3D extent);
+void radv_meta_decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout layout,
+ const VkImageSubresourceLayers *subresource, VkOffset3D offset, VkExtent3D extent);
/**
* Return whether the bound pipeline is the FMASK decompress pass.
return false;
return pipeline->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX8 ||
- (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 &&
- pipeline->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11);
+ (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 &&
+ pipeline->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11);
}
/* common nir builder helpers */
nir_shader *radv_meta_build_nir_vs_generate_vertices(struct radv_device *dev);
nir_shader *radv_meta_build_nir_fs_noop(struct radv_device *dev);
-void radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer,
- int samples, nir_variable *input_img, nir_variable *color,
- nir_ssa_def *img_coord);
+void radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples,
+ nir_variable *input_img, nir_variable *color, nir_ssa_def *img_coord);
nir_ssa_def *radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding);
VkExtent3D dst_extent;
};
-static VkResult build_pipeline(struct radv_device *device, VkImageAspectFlagBits aspect,
- enum glsl_sampler_dim tex_dim, VkFormat format,
- VkPipeline *pipeline);
+static VkResult build_pipeline(struct radv_device *device, VkImageAspectFlagBits aspect, enum glsl_sampler_dim tex_dim,
+ VkFormat format, VkPipeline *pipeline);
static nir_shader *
build_nir_vertex_shader(struct radv_device *dev)
nir_store_var(&b, pos_out, outvec, 0xf);
nir_ssa_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
- nir_ssa_def *src0_z =
- nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
+ nir_ssa_def *src0_z = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(&b);
build_nir_copy_fragment_shader_depth(struct radv_device *dev, enum glsl_sampler_dim tex_dim)
{
const struct glsl_type *vec4 = glsl_vec4_type();
- nir_builder b =
- radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_blit_depth_fs.%d", tex_dim);
+ nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_blit_depth_fs.%d", tex_dim);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos");
tex_pos_in->data.location = VARYING_SLOT_VAR0;
build_nir_copy_fragment_shader_stencil(struct radv_device *dev, enum glsl_sampler_dim tex_dim)
{
const struct glsl_type *vec4 = glsl_vec4_type();
- nir_builder b =
- radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_blit_stencil_fs.%d", tex_dim);
+ nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_blit_stencil_fs.%d", tex_dim);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos");
tex_pos_in->data.location = VARYING_SLOT_VAR0;
}
static void
-meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
- struct radv_image_view *src_iview, VkImageLayout src_image_layout,
- float src_offset_0[3], float src_offset_1[3], struct radv_image *dst_image,
- struct radv_image_view *dst_iview, VkImageLayout dst_image_layout,
+meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, struct radv_image_view *src_iview,
+ VkImageLayout src_image_layout, float src_offset_0[3], float src_offset_1[3],
+ struct radv_image *dst_image, struct radv_image_view *dst_iview, VkImageLayout dst_image_layout,
VkRect2D dst_box, VkSampler sampler)
{
struct radv_device *device = cmd_buffer->device;
assert(src_image->vk.samples == dst_image->vk.samples);
float vertex_push_constants[5] = {
- src_offset_0[0] / (float)src_width, src_offset_0[1] / (float)src_height,
- src_offset_1[0] / (float)src_width, src_offset_1[1] / (float)src_height,
- src_offset_0[2] / (float)src_depth,
+ src_offset_0[0] / (float)src_width, src_offset_0[1] / (float)src_height, src_offset_1[0] / (float)src_width,
+ src_offset_1[1] / (float)src_height, src_offset_0[2] / (float)src_depth,
};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.blit.pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
- vertex_push_constants);
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.blit.pipeline_layout,
+ VK_SHADER_STAGE_VERTEX_BIT, 0, 20, vertex_push_constants);
VkPipeline *pipeline = NULL;
unsigned fs_key = 0;
}
if (!*pipeline) {
- VkResult ret = build_pipeline(device, src_iview->vk.aspects,
- translate_sampler_dim(src_image->vk.image_type),
+ VkResult ret = build_pipeline(device, src_iview->vk.aspects, translate_sampler_dim(src_image->vk.image_type),
format, pipeline);
if (ret != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, ret);
}
}
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
- *pipeline);
-
- radv_meta_push_descriptor_set(
- cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.blit.pipeline_layout,
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .pImageInfo = (VkDescriptorImageInfo[]){
- {
- .sampler = sampler,
- .imageView = radv_image_view_to_handle(src_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }}});
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.blit.pipeline_layout,
+ 0, /* set */
+ 1, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {
+ .sampler = sampler,
+ .imageView = radv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }}});
VkRenderingInfo rendering_info = {
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
- .renderArea = {
- .offset = { 0, 0 },
- .extent = { dst_width, dst_height },
- },
+ .renderArea =
+ {
+ .offset = {0, 0},
+ .extent = {dst_width, dst_height},
+ },
.layerCount = 1,
};
unsigned dst_layout = radv_meta_dst_layout_from_layout(dst_image_layout);
VkImageLayout layout = radv_meta_dst_layout_to_layout(dst_layout);
- color_att = (VkRenderingAttachmentInfo) {
+ color_att = (VkRenderingAttachmentInfo){
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
.imageView = radv_image_view_to_handle(dst_iview),
.imageLayout = layout,
enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst_image_layout);
VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
- depth_att = (VkRenderingAttachmentInfo) {
+ depth_att = (VkRenderingAttachmentInfo){
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
.imageView = radv_image_view_to_handle(dst_iview),
.imageLayout = layout,
enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst_image_layout);
VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
- stencil_att = (VkRenderingAttachmentInfo) {
+ stencil_att = (VkRenderingAttachmentInfo){
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
.imageView = radv_image_view_to_handle(dst_iview),
.imageLayout = layout,
}
static void
-blit_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
- VkImageLayout src_image_layout, struct radv_image *dst_image,
- VkImageLayout dst_image_layout, const VkImageBlit2 *region, VkFilter filter)
+blit_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout,
+ struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageBlit2 *region, VkFilter filter)
{
const VkImageSubresourceLayers *src_res = ®ion->srcSubresource;
const VkImageSubresourceLayers *dst_res = ®ion->dstSubresource;
* affected by conditional rendering.
*/
radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS |
- RADV_META_SAVE_DESCRIPTORS | RADV_META_SUSPEND_PREDICATING);
+ RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS |
+ RADV_META_SUSPEND_PREDICATING);
unsigned dst_start, dst_end;
if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
.minDepth = 0.0f,
.maxDepth = 1.0f});
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
- &(VkRect2D){
- .offset = (VkOffset2D){MIN2(dst_offset_0.x, dst_offset_1.x),
- MIN2(dst_offset_0.y, dst_offset_1.y)},
- .extent = (VkExtent2D){abs(dst_offset_1.x - dst_offset_0.x),
- abs(dst_offset_1.y - dst_offset_0.y)},
- });
+ radv_CmdSetScissor(
+ radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkRect2D){
+ .offset = (VkOffset2D){MIN2(dst_offset_0.x, dst_offset_1.x), MIN2(dst_offset_0.y, dst_offset_1.y)},
+ .extent = (VkExtent2D){abs(dst_offset_1.x - dst_offset_0.x), abs(dst_offset_1.y - dst_offset_0.y)},
+ });
const unsigned num_layers = dst_end - dst_start;
for (unsigned i = 0; i < num_layers; i++) {
.layerCount = 1},
},
0, NULL);
- meta_emit_blit(cmd_buffer, src_image, &src_iview, src_image_layout, src_offset_0,
- src_offset_1, dst_image, &dst_iview, dst_image_layout, dst_box, sampler);
+ meta_emit_blit(cmd_buffer, src_image, &src_iview, src_image_layout, src_offset_0, src_offset_1, dst_image,
+ &dst_iview, dst_image_layout, dst_box, sampler);
radv_image_view_finish(&dst_iview);
radv_image_view_finish(&src_iview);
RADV_FROM_HANDLE(radv_image, dst_image, pBlitImageInfo->dstImage);
for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) {
- blit_image(cmd_buffer, src_image, pBlitImageInfo->srcImageLayout, dst_image,
- pBlitImageInfo->dstImageLayout, &pBlitImageInfo->pRegions[r],
- pBlitImageInfo->filter);
+ blit_image(cmd_buffer, src_image, pBlitImageInfo->srcImageLayout, dst_image, pBlitImageInfo->dstImageLayout,
+ &pBlitImageInfo->pRegions[r], pBlitImageInfo->filter);
}
}
struct radv_meta_state *state = &device->meta_state;
for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
- radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_1d_src[i],
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_2d_src[i],
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_3d_src[i],
- &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_1d_src[i], &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_2d_src[i], &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_3d_src[i], &state->alloc);
}
- radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_1d_pipeline,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_2d_pipeline,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_3d_pipeline,
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_1d_pipeline,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_2d_pipeline,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_3d_pipeline,
- &state->alloc);
-
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->blit.pipeline_layout,
- &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->blit.ds_layout, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_1d_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_2d_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_3d_pipeline, &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_1d_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_2d_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_3d_pipeline, &state->alloc);
+
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->blit.pipeline_layout, &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->blit.ds_layout,
+ &state->alloc);
}
static VkResult
-build_pipeline(struct radv_device *device, VkImageAspectFlagBits aspect,
- enum glsl_sampler_dim tex_dim, VkFormat format, VkPipeline *pipeline)
+build_pipeline(struct radv_device *device, VkImageAspectFlagBits aspect, enum glsl_sampler_dim tex_dim, VkFormat format,
+ VkPipeline *pipeline)
{
VkResult result = VK_SUCCESS;
.scissorCount = 1,
},
.pRasterizationState =
- &(VkPipelineRasterizationStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
- .lineWidth = 1.0f},
+ &(VkPipelineRasterizationStateCreateInfo){.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ .lineWidth = 1.0f},
.pMultisampleState =
&(VkPipelineMultisampleStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
VkPipelineColorBlendStateCreateInfo color_blend_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.attachmentCount = 1,
- .pAttachments = (VkPipelineColorBlendAttachmentState[]){
- {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
- VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT},
- },
- .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }};
+ .pAttachments =
+ (VkPipelineColorBlendAttachmentState[]){
+ {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT},
+ },
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}};
VkPipelineDepthStencilStateCreateInfo depth_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
- result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info, &radv_pipeline_info,
- &device->meta_state.alloc, pipeline);
+ result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ &radv_pipeline_info, &device->meta_state.alloc, pipeline);
ralloc_free(vs);
ralloc_free(fs);
mtx_unlock(&device->meta_state.mtx);
if (on_demand)
return VK_SUCCESS;
- result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_1D,
- VK_FORMAT_D32_SFLOAT,
+ result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_1D, VK_FORMAT_D32_SFLOAT,
&device->meta_state.blit.depth_only_1d_pipeline);
if (result != VK_SUCCESS)
goto fail;
- result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_2D,
- VK_FORMAT_D32_SFLOAT,
+ result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_2D, VK_FORMAT_D32_SFLOAT,
&device->meta_state.blit.depth_only_2d_pipeline);
if (result != VK_SUCCESS)
goto fail;
- result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_3D,
- VK_FORMAT_D32_SFLOAT,
+ result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_3D, VK_FORMAT_D32_SFLOAT,
&device->meta_state.blit.depth_only_3d_pipeline);
if (result != VK_SUCCESS)
goto fail;
if (on_demand)
return VK_SUCCESS;
- result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_1D,
- VK_FORMAT_S8_UINT,
+ result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_1D, VK_FORMAT_S8_UINT,
&device->meta_state.blit.stencil_only_1d_pipeline);
if (result != VK_SUCCESS)
goto fail;
- result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_2D,
- VK_FORMAT_S8_UINT,
+ result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_2D, VK_FORMAT_S8_UINT,
&device->meta_state.blit.stencil_only_2d_pipeline);
if (result != VK_SUCCESS)
goto fail;
- result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_3D,
- VK_FORMAT_S8_UINT,
+ result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_3D, VK_FORMAT_S8_UINT,
&device->meta_state.blit.stencil_only_3d_pipeline);
if (result != VK_SUCCESS)
goto fail;
{
VkResult result;
- VkDescriptorSetLayoutCreateInfo ds_layout_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 1,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
- .pImmutableSamplers = NULL},
- }};
- result =
- radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info,
- &device->meta_state.alloc, &device->meta_state.blit.ds_layout);
+ VkDescriptorSetLayoutCreateInfo ds_layout_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, &device->meta_state.alloc,
+ &device->meta_state.blit.ds_layout);
if (result != VK_SUCCESS)
return result;
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_constant_range,
},
- &device->meta_state.alloc,
- &device->meta_state.blit.pipeline_layout);
+ &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout);
if (result != VK_SUCCESS)
return result;
BLIT2D_NUM_SRC_TYPES,
};
-static VkResult blit2d_init_color_pipeline(struct radv_device *device,
- enum blit2d_src_type src_type, VkFormat format,
+static VkResult blit2d_init_color_pipeline(struct radv_device *device, enum blit2d_src_type src_type, VkFormat format,
uint32_t log2_samples);
-static VkResult blit2d_init_depth_only_pipeline(struct radv_device *device,
- enum blit2d_src_type src_type,
+static VkResult blit2d_init_depth_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type,
uint32_t log2_samples);
-static VkResult blit2d_init_stencil_only_pipeline(struct radv_device *device,
- enum blit2d_src_type src_type,
+static VkResult blit2d_init_stencil_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type,
uint32_t log2_samples);
static void
-create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
- struct radv_image_view *iview, VkFormat depth_format, VkImageAspectFlagBits aspects)
+create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, struct radv_image_view *iview,
+ VkFormat depth_format, VkImageAspectFlagBits aspects)
{
VkFormat format;
.baseArrayLayer = surf->layer,
.layerCount = 1},
},
- 0, &(struct radv_image_view_extra_create_info){
- .disable_dcc_mrt = surf->disable_compression
- });
+ 0, &(struct radv_image_view_extra_create_info){.disable_dcc_mrt = surf->disable_compression});
}
static void
-create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src,
- struct radv_buffer_view *bview, VkFormat depth_format)
+create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src, struct radv_buffer_view *bview,
+ VkFormat depth_format)
{
VkFormat format;
static void
blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img,
- struct radv_meta_blit2d_buffer *src_buf, struct blit2d_src_temps *tmp,
- enum blit2d_src_type src_type, VkFormat depth_format, VkImageAspectFlagBits aspects,
- uint32_t log2_samples)
+ struct radv_meta_blit2d_buffer *src_buf, struct blit2d_src_temps *tmp, enum blit2d_src_type src_type,
+ VkFormat depth_format, VkImageAspectFlagBits aspects, uint32_t log2_samples)
{
struct radv_device *device = cmd_buffer->device;
create_bview(cmd_buffer, src_buf, &tmp->bview, depth_format);
radv_meta_push_descriptor_set(
- cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
- device->meta_state.blit2d[log2_samples].p_layouts[src_type], 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){
- {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(&tmp->bview)}}});
+ cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.blit2d[log2_samples].p_layouts[src_type],
+ 0, /* set */
+ 1, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(&tmp->bview)}}});
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.blit2d[log2_samples].p_layouts[src_type],
- VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4, &src_buf->pitch);
+ device->meta_state.blit2d[log2_samples].p_layouts[src_type], VK_SHADER_STAGE_FRAGMENT_BIT,
+ 16, 4, &src_buf->pitch);
} else {
create_iview(cmd_buffer, src_img, &tmp->iview, depth_format, aspects);
device->meta_state.blit2d[log2_samples].p_layouts[src_type],
VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4, &src_img->layer);
- radv_meta_push_descriptor_set(
- cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
- device->meta_state.blit2d[log2_samples].p_layouts[src_type], 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]){
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&tmp->iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }}});
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ device->meta_state.blit2d[log2_samples].p_layouts[src_type], 0, /* set */
+ 1, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(&tmp->iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }}});
}
}
};
static void
-bind_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type, unsigned fs_key,
- uint32_t log2_samples)
+bind_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type, unsigned fs_key, uint32_t log2_samples)
{
- VkPipeline pipeline =
- cmd_buffer->device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key];
+ VkPipeline pipeline = cmd_buffer->device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key];
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
- pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
}
static void
-bind_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type,
- uint32_t log2_samples)
+bind_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type, uint32_t log2_samples)
{
- VkPipeline pipeline =
- cmd_buffer->device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type];
+ VkPipeline pipeline = cmd_buffer->device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type];
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
- pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
}
static void
-bind_stencil_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type,
- uint32_t log2_samples)
+bind_stencil_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type, uint32_t log2_samples)
{
- VkPipeline pipeline =
- cmd_buffer->device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type];
+ VkPipeline pipeline = cmd_buffer->device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type];
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
- pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
}
static void
-radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src_img,
- struct radv_meta_blit2d_buffer *src_buf,
- struct radv_meta_blit2d_surf *dst, unsigned num_rects,
- struct radv_meta_blit2d_rect *rects, enum blit2d_src_type src_type,
+radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img,
+ struct radv_meta_blit2d_buffer *src_buf, struct radv_meta_blit2d_surf *dst,
+ unsigned num_rects, struct radv_meta_blit2d_rect *rects, enum blit2d_src_type src_type,
uint32_t log2_samples)
{
struct radv_device *device = cmd_buffer->device;
.extent = (VkExtent2D){rects[r].width, rects[r].height},
});
- u_foreach_bit(i, dst->aspect_mask)
- {
+ u_foreach_bit (i, dst->aspect_mask) {
unsigned aspect_mask = 1u << i;
unsigned src_aspect_mask = aspect_mask;
VkFormat depth_format = 0;
src_aspect_mask = src_img->aspect_mask;
struct blit2d_src_temps src_temps;
- blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format,
- src_aspect_mask, log2_samples);
+ blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format, src_aspect_mask,
+ log2_samples);
struct blit2d_dst_temps dst_temps;
create_iview(cmd_buffer, dst, &dst_temps.iview, depth_format, aspect_mask);
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.blit2d[log2_samples].p_layouts[src_type],
- VK_SHADER_STAGE_VERTEX_BIT, 0, 16, vertex_push_constants);
+ device->meta_state.blit2d[log2_samples].p_layouts[src_type], VK_SHADER_STAGE_VERTEX_BIT,
+ 0, 16, vertex_push_constants);
- if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT ||
- aspect_mask == VK_IMAGE_ASPECT_PLANE_0_BIT ||
- aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT ||
- aspect_mask == VK_IMAGE_ASPECT_PLANE_2_BIT) {
+ if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT || aspect_mask == VK_IMAGE_ASPECT_PLANE_0_BIT ||
+ aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT || aspect_mask == VK_IMAGE_ASPECT_PLANE_2_BIT) {
unsigned fs_key = radv_format_meta_fs_key(device, dst_temps.iview.vk.format);
- if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] ==
- VK_NULL_HANDLE) {
- VkResult ret = blit2d_init_color_pipeline(
- device, src_type, radv_fs_key_format_exemplars[fs_key], log2_samples);
+ if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] == VK_NULL_HANDLE) {
+ VkResult ret =
+ blit2d_init_color_pipeline(device, src_type, radv_fs_key_format_exemplars[fs_key], log2_samples);
if (ret != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, ret);
goto fail_pipeline;
const VkRenderingInfo rendering_info = {
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
- .renderArea = {
- .offset = { rects[r].dst_x, rects[r].dst_y },
- .extent = { rects[r].width, rects[r].height },
- },
+ .renderArea =
+ {
+ .offset = {rects[r].dst_x, rects[r].dst_y},
+ .extent = {rects[r].width, rects[r].height},
+ },
.layerCount = 1,
.colorAttachmentCount = 1,
.pColorAttachments = &color_att_info,
bind_pipeline(cmd_buffer, src_type, fs_key, log2_samples);
} else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
- if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type] ==
- VK_NULL_HANDLE) {
+ if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type] == VK_NULL_HANDLE) {
VkResult ret = blit2d_init_depth_only_pipeline(device, src_type, log2_samples);
if (ret != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, ret);
const VkRenderingInfo rendering_info = {
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
- .renderArea = {
- .offset = { rects[r].dst_x, rects[r].dst_y },
- .extent = { rects[r].width, rects[r].height },
- },
+ .renderArea =
+ {
+ .offset = {rects[r].dst_x, rects[r].dst_y},
+ .extent = {rects[r].width, rects[r].height},
+ },
.layerCount = 1,
.pDepthAttachment = &depth_att_info,
- .pStencilAttachment = (dst->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ?
- &depth_att_info : NULL,
+ .pStencilAttachment = (dst->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? &depth_att_info : NULL,
};
radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
bind_depth_pipeline(cmd_buffer, src_type, log2_samples);
} else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
- if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type] ==
- VK_NULL_HANDLE) {
+ if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type] == VK_NULL_HANDLE) {
VkResult ret = blit2d_init_stencil_only_pipeline(device, src_type, log2_samples);
if (ret != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, ret);
const VkRenderingInfo rendering_info = {
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
- .renderArea = {
- .offset = { rects[r].dst_x, rects[r].dst_y },
- .extent = { rects[r].width, rects[r].height },
- },
+ .renderArea =
+ {
+ .offset = {rects[r].dst_x, rects[r].dst_y},
+ .extent = {rects[r].width, rects[r].height},
+ },
.layerCount = 1,
- .pDepthAttachment = (dst->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ?
- &stencil_att_info : NULL,
+ .pDepthAttachment = (dst->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? &stencil_att_info : NULL,
.pStencilAttachment = &stencil_att_info,
};
void
radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img,
- struct radv_meta_blit2d_buffer *src_buf, struct radv_meta_blit2d_surf *dst,
- unsigned num_rects, struct radv_meta_blit2d_rect *rects)
+ struct radv_meta_blit2d_buffer *src_buf, struct radv_meta_blit2d_surf *dst, unsigned num_rects,
+ struct radv_meta_blit2d_rect *rects)
{
bool use_3d = (src_img && src_img->image->vk.image_type == VK_IMAGE_TYPE_3D);
enum blit2d_src_type src_type = src_buf ? BLIT2D_SRC_TYPE_BUFFER
return b.shader;
}
-typedef nir_ssa_def *(*texel_fetch_build_func)(struct nir_builder *, struct radv_device *,
- nir_ssa_def *, bool, bool);
+typedef nir_ssa_def *(*texel_fetch_build_func)(struct nir_builder *, struct radv_device *, nir_ssa_def *, bool, bool);
static nir_ssa_def *
-build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos,
- bool is_3d, bool is_multisampled)
+build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos, bool is_3d,
+ bool is_multisampled)
{
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D
: is_multisampled ? GLSL_SAMPLER_DIM_MS
nir_ssa_def *tex_pos_3d = NULL;
nir_ssa_def *sample_idx = NULL;
if (is_3d) {
- nir_ssa_def *layer =
- nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
+ nir_ssa_def *layer = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
nir_ssa_def *chans[3];
chans[0] = nir_channel(b, tex_pos, 0);
}
static nir_ssa_def *
-build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos,
- bool is_3d, bool is_multisampled)
+build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos, bool is_3d,
+ bool is_multisampled)
{
- const struct glsl_type *sampler_type =
- glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
+ const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
};
static nir_shader *
-build_nir_copy_fragment_shader(struct radv_device *device, texel_fetch_build_func txf_func,
- const char *name, bool is_3d, bool is_multisampled)
+build_nir_copy_fragment_shader(struct radv_device *device, texel_fetch_build_func txf_func, const char *name,
+ bool is_3d, bool is_multisampled)
{
const struct glsl_type *vec4 = glsl_vec4_type();
const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
}
static nir_shader *
-build_nir_copy_fragment_shader_depth(struct radv_device *device, texel_fetch_build_func txf_func,
- const char *name, bool is_3d, bool is_multisampled)
+build_nir_copy_fragment_shader_depth(struct radv_device *device, texel_fetch_build_func txf_func, const char *name,
+ bool is_3d, bool is_multisampled)
{
const struct glsl_type *vec4 = glsl_vec4_type();
const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
}
static nir_shader *
-build_nir_copy_fragment_shader_stencil(struct radv_device *device, texel_fetch_build_func txf_func,
- const char *name, bool is_3d, bool is_multisampled)
+build_nir_copy_fragment_shader_stencil(struct radv_device *device, texel_fetch_build_func txf_func, const char *name,
+ bool is_3d, bool is_multisampled)
{
const struct glsl_type *vec4 = glsl_vec4_type();
const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
for (unsigned log2_samples = 0; log2_samples < MAX_SAMPLES_LOG2; ++log2_samples) {
for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->blit2d[log2_samples].p_layouts[src], &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->blit2d[log2_samples].p_layouts[src],
+ &state->alloc);
device->vk.dispatch_table.DestroyDescriptorSetLayout(
- radv_device_to_handle(device), state->blit2d[log2_samples].ds_layouts[src],
- &state->alloc);
+ radv_device_to_handle(device), state->blit2d[log2_samples].ds_layouts[src], &state->alloc);
for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->blit2d[log2_samples].pipelines[src][j], &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit2d[log2_samples].pipelines[src][j],
+ &state->alloc);
}
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->blit2d[log2_samples].depth_only_pipeline[src], &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->blit2d[log2_samples].stencil_only_pipeline[src],
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit2d[log2_samples].depth_only_pipeline[src],
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit2d[log2_samples].stencil_only_pipeline[src],
&state->alloc);
}
}
}
static VkResult
-blit2d_init_color_pipeline(struct radv_device *device, enum blit2d_src_type src_type,
- VkFormat format, uint32_t log2_samples)
+blit2d_init_color_pipeline(struct radv_device *device, enum blit2d_src_type src_type, VkFormat format,
+ uint32_t log2_samples)
{
VkResult result;
unsigned fs_key = radv_format_meta_fs_key(device, format);
}
const VkPipelineVertexInputStateCreateInfo *vi_create_info;
- nir_shader *fs = build_nir_copy_fragment_shader(
- device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
+ nir_shader *fs =
+ build_nir_copy_fragment_shader(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
nir_shader *vs = build_nir_vertex_shader(device);
vi_create_info = &normal_vi_create_info;
.scissorCount = 1,
},
.pRasterizationState =
- &(VkPipelineRasterizationStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
- .depthBiasConstantFactor = 0.0f,
- .depthBiasClamp = 0.0f,
- .depthBiasSlopeFactor = 0.0f,
- .lineWidth = 1.0f},
+ &(VkPipelineRasterizationStateCreateInfo){.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ .depthBiasConstantFactor = 0.0f,
+ .depthBiasClamp = 0.0f,
+ .depthBiasSlopeFactor = 0.0f,
+ .lineWidth = 1.0f},
.pMultisampleState =
&(VkPipelineMultisampleStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.attachmentCount = 1,
.pAttachments =
(VkPipelineColorBlendAttachmentState[]){
- {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
- VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT},
+ {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT},
},
- .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }},
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}},
.pDynamicState =
&(VkPipelineDynamicStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
- result = radv_graphics_pipeline_create(
- radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
- &radv_pipeline_info, &device->meta_state.alloc,
- &device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]);
+ result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ &radv_pipeline_info, &device->meta_state.alloc,
+ &device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]);
ralloc_free(vs);
ralloc_free(fs);
}
static VkResult
-blit2d_init_depth_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type,
- uint32_t log2_samples)
+blit2d_init_depth_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type, uint32_t log2_samples)
{
VkResult result;
const char *name;
}
const VkPipelineVertexInputStateCreateInfo *vi_create_info;
- nir_shader *fs = build_nir_copy_fragment_shader_depth(
- device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
+ nir_shader *fs = build_nir_copy_fragment_shader_depth(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D,
+ log2_samples > 0);
nir_shader *vs = build_nir_vertex_shader(device);
vi_create_info = &normal_vi_create_info;
.scissorCount = 1,
},
.pRasterizationState =
- &(VkPipelineRasterizationStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
- .depthBiasConstantFactor = 0.0f,
- .depthBiasClamp = 0.0f,
- .depthBiasSlopeFactor = 0.0f,
- .lineWidth = 1.0f},
+ &(VkPipelineRasterizationStateCreateInfo){.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ .depthBiasConstantFactor = 0.0f,
+ .depthBiasClamp = 0.0f,
+ .depthBiasSlopeFactor = 0.0f,
+ .lineWidth = 1.0f},
.pMultisampleState =
&(VkPipelineMultisampleStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.attachmentCount = 0,
.pAttachments = NULL,
- .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f },
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
},
.pDepthStencilState =
&(VkPipelineDepthStencilStateCreateInfo){
.depthTestEnable = true,
.depthWriteEnable = true,
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
- .front = {
- .failOp = VK_STENCIL_OP_KEEP,
- .passOp = VK_STENCIL_OP_KEEP,
- .depthFailOp = VK_STENCIL_OP_KEEP,
- .compareOp = VK_COMPARE_OP_NEVER,
- .compareMask = UINT32_MAX,
- .writeMask = UINT32_MAX,
- .reference = 0u,
- },
- .back = {
- .failOp = VK_STENCIL_OP_KEEP,
- .passOp = VK_STENCIL_OP_KEEP,
- .depthFailOp = VK_STENCIL_OP_KEEP,
- .compareOp = VK_COMPARE_OP_NEVER,
- .compareMask = UINT32_MAX,
- .writeMask = UINT32_MAX,
- .reference = 0u,
- },
+ .front =
+ {
+ .failOp = VK_STENCIL_OP_KEEP,
+ .passOp = VK_STENCIL_OP_KEEP,
+ .depthFailOp = VK_STENCIL_OP_KEEP,
+ .compareOp = VK_COMPARE_OP_NEVER,
+ .compareMask = UINT32_MAX,
+ .writeMask = UINT32_MAX,
+ .reference = 0u,
+ },
+ .back =
+ {
+ .failOp = VK_STENCIL_OP_KEEP,
+ .passOp = VK_STENCIL_OP_KEEP,
+ .depthFailOp = VK_STENCIL_OP_KEEP,
+ .compareOp = VK_COMPARE_OP_NEVER,
+ .compareMask = UINT32_MAX,
+ .writeMask = UINT32_MAX,
+ .reference = 0u,
+ },
.minDepthBounds = 0.0f,
.maxDepthBounds = 1.0f,
},
const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
- result = radv_graphics_pipeline_create(
- radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
- &radv_pipeline_info, &device->meta_state.alloc,
- &device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]);
+ result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ &radv_pipeline_info, &device->meta_state.alloc,
+ &device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]);
ralloc_free(vs);
ralloc_free(fs);
}
static VkResult
-blit2d_init_stencil_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type,
- uint32_t log2_samples)
+blit2d_init_stencil_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type, uint32_t log2_samples)
{
VkResult result;
const char *name;
}
const VkPipelineVertexInputStateCreateInfo *vi_create_info;
- nir_shader *fs = build_nir_copy_fragment_shader_stencil(
- device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
+ nir_shader *fs = build_nir_copy_fragment_shader_stencil(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D,
+ log2_samples > 0);
nir_shader *vs = build_nir_vertex_shader(device);
vi_create_info = &normal_vi_create_info;
.scissorCount = 1,
},
.pRasterizationState =
- &(VkPipelineRasterizationStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
- .depthBiasConstantFactor = 0.0f,
- .depthBiasClamp = 0.0f,
- .depthBiasSlopeFactor = 0.0f,
- .lineWidth = 1.0f},
+ &(VkPipelineRasterizationStateCreateInfo){.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ .depthBiasConstantFactor = 0.0f,
+ .depthBiasClamp = 0.0f,
+ .depthBiasSlopeFactor = 0.0f,
+ .lineWidth = 1.0f},
.pMultisampleState =
&(VkPipelineMultisampleStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.attachmentCount = 0,
.pAttachments = NULL,
- .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f },
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
},
.pDepthStencilState =
&(VkPipelineDepthStencilStateCreateInfo){
const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
- result = radv_graphics_pipeline_create(
- radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
- &radv_pipeline_info, &device->meta_state.alloc,
- &device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]);
+ result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ &radv_pipeline_info, &device->meta_state.alloc,
+ &device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]);
ralloc_free(vs);
ralloc_free(fs);
meta_blit2d_create_pipe_layout(struct radv_device *device, int idx, uint32_t log2_samples)
{
VkResult result;
- VkDescriptorType desc_type = (idx == BLIT2D_SRC_TYPE_BUFFER)
- ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER
- : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
+ VkDescriptorType desc_type =
+ (idx == BLIT2D_SRC_TYPE_BUFFER) ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
const VkPushConstantRange push_constant_ranges[] = {
{VK_SHADER_STAGE_VERTEX_BIT, 0, 16},
{VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4},
result = radv_CreateDescriptorSetLayout(
radv_device_to_handle(device),
- &(VkDescriptorSetLayoutCreateInfo){
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 1,
- .pBindings =
- (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = desc_type,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
- .pImmutableSamplers = NULL},
- }},
+ &(VkDescriptorSetLayoutCreateInfo){.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 1,
+ .pBindings =
+ (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = desc_type,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .pImmutableSamplers = NULL},
+ }},
&device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].ds_layouts[idx]);
if (result != VK_SUCCESS)
goto fail;
- result = radv_CreatePipelineLayout(
- radv_device_to_handle(device),
- &(VkPipelineLayoutCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.blit2d[log2_samples].ds_layouts[idx],
- .pushConstantRangeCount = num_push_constant_range,
- .pPushConstantRanges = push_constant_ranges,
- },
- &device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].p_layouts[idx]);
+ result =
+ radv_CreatePipelineLayout(radv_device_to_handle(device),
+ &(VkPipelineLayoutCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.blit2d[log2_samples].ds_layouts[idx],
+ .pushConstantRangeCount = num_push_constant_range,
+ .pPushConstantRanges = push_constant_ranges,
+ },
+ &device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].p_layouts[idx]);
if (result != VK_SUCCESS)
goto fail;
return VK_SUCCESS;
continue;
for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
- result = blit2d_init_color_pipeline(device, src, radv_fs_key_format_exemplars[j],
- log2_samples);
+ result = blit2d_init_color_pipeline(device, src, radv_fs_key_format_exemplars[j], log2_samples);
if (result != VK_SUCCESS)
return result;
}
nir_ssa_def *size_minus16 = nir_channel(&b, pconst, 2);
nir_ssa_def *data = nir_swizzle(&b, nir_channel(&b, pconst, 3), (unsigned[]){0, 0, 0, 0}, 4);
- nir_ssa_def *global_id =
- nir_iadd(&b,
- nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0),
- b.shader->info.workgroup_size[0]),
- nir_load_local_invocation_index(&b));
+ nir_ssa_def *global_id = nir_iadd(
+ &b, nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0), b.shader->info.workgroup_size[0]),
+ nir_load_local_invocation_index(&b));
nir_ssa_def *offset = nir_imin(&b, nir_imul_imm(&b, global_id, 16), size_minus16);
nir_ssa_def *dst_addr = nir_iadd(&b, buffer_addr, nir_u2u64(&b, offset));
b.shader->info.workgroup_size[0] = 64;
nir_ssa_def *pconst = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
- nir_ssa_def *size_minus16 =
- nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
+ nir_ssa_def *size_minus16 = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
nir_ssa_def *src_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b0011));
nir_ssa_def *dst_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b1100));
- nir_ssa_def *global_id =
- nir_iadd(&b,
- nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0),
- b.shader->info.workgroup_size[0]),
- nir_load_local_invocation_index(&b));
+ nir_ssa_def *global_id = nir_iadd(
+ &b, nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0), b.shader->info.workgroup_size[0]),
+ nir_load_local_invocation_index(&b));
nir_ssa_def *offset = nir_u2u64(&b, nir_imin(&b, nir_imul_imm(&b, global_id, 16), size_minus16));
- nir_ssa_def *data =
- nir_build_load_global(&b, 4, 32, nir_iadd(&b, src_addr, offset), .align_mul = 4);
+ nir_ssa_def *data = nir_build_load_global(&b, 4, 32, nir_iadd(&b, src_addr, offset), .align_mul = 4);
nir_build_store_global(&b, data, nir_iadd(&b, dst_addr, offset), .align_mul = 4);
return b.shader;
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 0,
.pushConstantRangeCount = 1,
- .pPushConstantRanges =
- &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct fill_constants)},
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct fill_constants)},
};
- result = radv_CreatePipelineLayout(radv_device_to_handle(device), &fill_pl_create_info,
- &device->meta_state.alloc,
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &fill_pl_create_info, &device->meta_state.alloc,
&device->meta_state.buffer.fill_p_layout);
if (result != VK_SUCCESS)
goto fail;
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 0,
.pushConstantRangeCount = 1,
- .pPushConstantRanges =
- &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct copy_constants)},
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct copy_constants)},
};
- result = radv_CreatePipelineLayout(radv_device_to_handle(device), ©_pl_create_info,
- &device->meta_state.alloc,
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), ©_pl_create_info, &device->meta_state.alloc,
&device->meta_state.buffer.copy_p_layout);
if (result != VK_SUCCESS)
goto fail;
};
result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &fill_vk_pipeline_info, NULL,
- &device->meta_state.buffer.fill_pipeline);
+ &fill_vk_pipeline_info, NULL, &device->meta_state.buffer.fill_pipeline);
if (result != VK_SUCCESS)
goto fail;
};
result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- ©_vk_pipeline_info, NULL,
- &device->meta_state.buffer.copy_pipeline);
+ ©_vk_pipeline_info, NULL, &device->meta_state.buffer.copy_pipeline);
if (result != VK_SUCCESS)
goto fail;
radv_DestroyPipeline(radv_device_to_handle(device), state->buffer.copy_pipeline, &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device), state->buffer.fill_pipeline, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.copy_p_layout,
- &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.fill_p_layout,
- &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.copy_p_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.fill_p_layout, &state->alloc);
}
static void
struct radv_device *device = cmd_buffer->device;
struct radv_meta_saved_state saved_state;
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.buffer.fill_pipeline);
.data = data,
};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.buffer.fill_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
- sizeof(fill_consts), &fill_consts);
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.buffer.fill_p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(fill_consts), &fill_consts);
radv_unaligned_dispatch(cmd_buffer, DIV_ROUND_UP(size, 16), 1, 1);
}
static void
-copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dst_va,
- uint64_t size)
+copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dst_va, uint64_t size)
{
struct radv_device *device = cmd_buffer->device;
struct radv_meta_saved_state saved_state;
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.buffer.copy_pipeline);
.size_minus16 = size - 16,
};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.buffer.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
- sizeof(copy_consts), ©_consts);
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.buffer.copy_p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(copy_consts), ©_consts);
radv_unaligned_dispatch(cmd_buffer, DIV_ROUND_UP(size, 16), 1, 1);
}
static bool
-radv_prefer_compute_dma(const struct radv_device *device, uint64_t size,
- struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo)
+radv_prefer_compute_dma(const struct radv_device *device, uint64_t size, struct radeon_winsys_bo *src_bo,
+ struct radeon_winsys_bo *dst_bo)
{
bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD;
- if (device->physical_device->rad_info.gfx_level >= GFX10 &&
- device->physical_device->rad_info.has_dedicated_vram) {
+ if (device->physical_device->rad_info.gfx_level >= GFX10 && device->physical_device->rad_info.has_dedicated_vram) {
if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) ||
(dst_bo && !(dst_bo->initial_domain & RADEON_DOMAIN_VRAM))) {
/* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */
}
uint32_t
-radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
- struct radeon_winsys_bo *bo, uint64_t va, uint64_t size, uint32_t value)
+radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, struct radeon_winsys_bo *bo,
+ uint64_t va, uint64_t size, uint32_t value)
{
bool use_compute = radv_prefer_compute_dma(cmd_buffer->device, size, NULL, bo);
uint32_t flush_bits = 0;
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
if (use_compute) {
- cmd_buffer->state.flush_bits |=
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
+ cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
fill_buffer_shader(cmd_buffer, va, size, value);
}
void
-radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo,
- struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset,
- uint64_t size)
+radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo,
+ uint64_t src_offset, uint64_t dst_offset, uint64_t size)
{
bool use_compute = !(size & 3) && !(src_offset & 3) && !(dst_offset & 3) &&
radv_prefer_compute_dma(cmd_buffer->device, size, src_bo, dst_bo);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
- VkDeviceSize fillSize, uint32_t data)
+radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize fillSize,
+ uint32_t data)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
fillSize = vk_buffer_range(&dst_buffer->vk, dstOffset, fillSize) & ~3ull;
radv_fill_buffer(cmd_buffer, NULL, dst_buffer->bo,
- radv_buffer_get_va(dst_buffer->bo) + dst_buffer->offset + dstOffset, fillSize,
- data);
+ radv_buffer_get_va(dst_buffer->bo) + dst_buffer->offset + dstOffset, fillSize, data);
}
static void
-copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *src_buffer,
- struct radv_buffer *dst_buffer, const VkBufferCopy2 *region)
+copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *src_buffer, struct radv_buffer *dst_buffer,
+ const VkBufferCopy2 *region)
{
bool old_predicating;
old_predicating = cmd_buffer->state.predicating;
cmd_buffer->state.predicating = false;
- radv_copy_buffer(cmd_buffer, src_buffer->bo, dst_buffer->bo,
- src_buffer->offset + region->srcOffset, dst_buffer->offset + region->dstOffset,
- region->size);
+ radv_copy_buffer(cmd_buffer, src_buffer->bo, dst_buffer->bo, src_buffer->offset + region->srcOffset,
+ dst_buffer->offset + region->dstOffset, region->size);
/* Restore conditional rendering. */
cmd_buffer->state.predicating = old_predicating;
}
void
-radv_update_buffer_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const void *data,
- uint64_t size)
+radv_update_buffer_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const void *data, uint64_t size)
{
uint64_t words = size / 4;
bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
- radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ? V_370_MEM : V_370_MEM_GRBM) |
- S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
+ radeon_emit(cmd_buffer->cs,
+ S_370_DST_SEL(mec ? V_370_MEM : V_370_MEM_GRBM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cmd_buffer->cs, va);
radeon_emit(cmd_buffer->cs, va >> 32);
radeon_emit_array(cmd_buffer->cs, data, words);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
- VkDeviceSize dataSize, const void *pData)
+radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize,
+ const void *pData)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
- nir_builder b =
- radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
+ nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
- nir_ssa_def *offset =
- nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
+ nir_ssa_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
- nir_ssa_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img),
- nir_trim_vector(&b, img_coord, 2 + is_3d), NULL);
+ nir_ssa_def *outval =
+ nir_txf_deref(&b, nir_build_deref_var(&b, input_img), nir_trim_vector(&b, img_coord, 2 + is_3d), NULL);
nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
nir_ssa_def *coord = nir_replicate(&b, tmp, 4);
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
- nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
- .image_dim = GLSL_SAMPLER_DIM_BUF);
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32), outval,
+ nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
return b.shader;
}
* two descriptors one for the image being sampled
* one for the buffer being written.
*/
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- {.binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
- &device->meta_state.alloc,
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
&device->meta_state.itob.img_ds_layout);
if (result != VK_SUCCESS)
goto fail;
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
};
- result =
- radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc, &device->meta_state.itob.img_p_layout);
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
+ &device->meta_state.itob.img_p_layout);
if (result != VK_SUCCESS)
goto fail;
.layout = device->meta_state.itob.img_p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info, NULL, &device->meta_state.itob.pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ NULL, &device->meta_state.itob.pipeline);
if (result != VK_SUCCESS)
goto fail;
.layout = device->meta_state.itob.img_p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info_3d, NULL,
- &device->meta_state.itob.pipeline_3d);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info_3d,
+ NULL, &device->meta_state.itob.pipeline_3d);
if (result != VK_SUCCESS)
goto fail;
{
struct radv_meta_state *state = &device->meta_state;
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itob.img_p_layout,
- &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->itob.img_ds_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itob.img_p_layout, &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itob.img_ds_layout,
+ &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline, &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline_3d, &state->alloc);
}
build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
{
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
- const struct glsl_type *buf_type =
- glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
+ const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
- nir_builder b =
- radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
+ nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
- nir_ssa_def *offset =
- nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
+ nir_ssa_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
nir_ssa_def *coord = nir_iadd(&b, global_id, offset);
nir_ssa_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL);
- nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0),
- nir_channel(&b, coord, 1),
- is_3d ? nir_channel(&b, coord, 2) : nir_ssa_undef(&b, 1, 32),
- nir_ssa_undef(&b, 1, 32));
+ nir_ssa_def *img_coord =
+ nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
+ is_3d ? nir_channel(&b, coord, 2) : nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32));
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
- nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), .image_dim = dim);
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
+ outval, nir_imm_int(&b, 0), .image_dim = dim);
return b.shader;
}
* two descriptors one for the image being sampled
* one for the buffer being written.
*/
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- {.binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
- &device->meta_state.alloc,
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
&device->meta_state.btoi.img_ds_layout);
if (result != VK_SUCCESS)
goto fail;
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
};
- result =
- radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc, &device->meta_state.btoi.img_p_layout);
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
+ &device->meta_state.btoi.img_p_layout);
if (result != VK_SUCCESS)
goto fail;
.layout = device->meta_state.btoi.img_p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info, NULL, &device->meta_state.btoi.pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ NULL, &device->meta_state.btoi.pipeline);
if (result != VK_SUCCESS)
goto fail;
.layout = device->meta_state.btoi.img_p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info_3d, NULL,
- &device->meta_state.btoi.pipeline_3d);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info_3d,
+ NULL, &device->meta_state.btoi.pipeline_3d);
ralloc_free(cs_3d);
ralloc_free(cs);
{
struct radv_meta_state *state = &device->meta_state;
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi.img_p_layout,
- &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->btoi.img_ds_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi.img_p_layout, &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->btoi.img_ds_layout,
+ &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline, &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline_3d, &state->alloc);
}
static nir_shader *
build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
{
- const struct glsl_type *buf_type =
- glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
+ const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_btoi_r32g32b32_cs");
b.shader->info.workgroup_size[0] = 8;
nir_ssa_def *coord = nir_replicate(&b, local_pos, 4);
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
- nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, chan),
- nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32),
+ nir_channel(&b, outval, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
}
return b.shader;
VkResult result;
nir_shader *cs = build_nir_btoi_r32g32b32_compute_shader(device);
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- {.binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
- &device->meta_state.alloc,
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
&device->meta_state.btoi_r32g32b32.img_ds_layout);
if (result != VK_SUCCESS)
goto fail;
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
};
- result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc,
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
&device->meta_state.btoi_r32g32b32.img_p_layout);
if (result != VK_SUCCESS)
goto fail;
.layout = device->meta_state.btoi_r32g32b32.img_p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info, NULL,
- &device->meta_state.btoi_r32g32b32.pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ NULL, &device->meta_state.btoi_r32g32b32.pipeline);
fail:
ralloc_free(cs);
{
struct radv_meta_state *state = &device->meta_state;
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi_r32g32b32.img_p_layout,
- &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(
- radv_device_to_handle(device), state->btoi_r32g32b32.img_ds_layout, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->btoi_r32g32b32.pipeline,
- &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi_r32g32b32.img_p_layout, &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->btoi_r32g32b32.img_ds_layout, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->btoi_r32g32b32.pipeline, &state->alloc);
}
static nir_shader *
: GLSL_SAMPLER_DIM_2D;
const struct glsl_type *buf_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
- nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE,
- is_3d ? "meta_itoi_cs_3d-%d" : "meta_itoi_cs-%d", samples);
+ nir_builder b =
+ radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_itoi_cs_3d-%d" : "meta_itoi_cs-%d", samples);
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
- nir_ssa_def *src_offset =
- nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
+ nir_ssa_def *src_offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
nir_ssa_def *dst_offset =
nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = is_3d ? 24 : 20);
nir_ssa_def *tex_vals[8];
if (is_multisampled) {
for (uint32_t i = 0; i < samples; i++) {
- tex_vals[i] = nir_txf_ms_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2),
- nir_imm_int(&b, i));
+ tex_vals[i] = nir_txf_ms_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2), nir_imm_int(&b, i));
}
} else {
- tex_vals[0] = nir_txf_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2 + is_3d),
- nir_imm_int(&b, 0));
+ tex_vals[0] = nir_txf_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2 + is_3d), nir_imm_int(&b, 0));
}
- nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0),
- nir_channel(&b, dst_coord, 1),
- is_3d ? nir_channel(&b, dst_coord, 2) : nir_ssa_undef(&b, 1, 32),
- nir_ssa_undef(&b, 1, 32));
+ nir_ssa_def *img_coord =
+ nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1),
+ is_3d ? nir_channel(&b, dst_coord, 2) : nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32));
for (uint32_t i = 0; i < samples; i++) {
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
- nir_imm_int(&b, i), tex_vals[i], nir_imm_int(&b, 0), .image_dim = dim);
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_imm_int(&b, i),
+ tex_vals[i], nir_imm_int(&b, 0), .image_dim = dim);
}
return b.shader;
.layout = state->itoi.img_p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache,
- &vk_pipeline_info, NULL, pipeline);
+ result =
+ radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &vk_pipeline_info, NULL, pipeline);
ralloc_free(cs);
return result;
}
* two descriptors one for the image being sampled
* one for the buffer being written.
*/
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- {.binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
- &device->meta_state.alloc,
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
&device->meta_state.itoi.img_ds_layout);
if (result != VK_SUCCESS)
goto fail;
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
};
- result =
- radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc, &device->meta_state.itoi.img_p_layout);
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
+ &device->meta_state.itoi.img_p_layout);
if (result != VK_SUCCESS)
goto fail;
.layout = device->meta_state.itoi.img_p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info_3d, NULL,
- &device->meta_state.itoi.pipeline_3d);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info_3d,
+ NULL, &device->meta_state.itoi.pipeline_3d);
ralloc_free(cs_3d);
return VK_SUCCESS;
{
struct radv_meta_state *state = &device->meta_state;
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi.img_p_layout,
- &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->itoi.img_ds_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi.img_p_layout, &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itoi.img_ds_layout,
+ &state->alloc);
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline[i], &state->alloc);
static nir_shader *
build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
{
- const struct glsl_type *type =
- glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
+ const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_itoi_r32g32b32_cs");
b.shader->info.workgroup_size[0] = 8;
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
- nir_variable *output_img =
- nir_variable_create(b.shader, nir_var_image, img_type, "output_img");
+ nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "output_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
- nir_ssa_def *src_global_pos =
- nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
- nir_imul_imm(&b, nir_channel(&b, src_img_coord, 0), 3));
+ nir_ssa_def *src_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
+ nir_imul_imm(&b, nir_channel(&b, src_img_coord, 0), 3));
- nir_ssa_def *dst_global_pos =
- nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
- nir_imul_imm(&b, nir_channel(&b, dst_img_coord, 0), 3));
+ nir_ssa_def *dst_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
+ nir_imul_imm(&b, nir_channel(&b, dst_img_coord, 0), 3));
for (int chan = 0; chan < 3; chan++) {
/* src */
nir_ssa_def *src_local_pos = nir_iadd_imm(&b, src_global_pos, chan);
- nir_ssa_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), src_local_pos,
- NULL);
+ nir_ssa_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), src_local_pos, NULL);
/* dst */
nir_ssa_def *dst_local_pos = nir_iadd_imm(&b, dst_global_pos, chan);
- nir_ssa_def *dst_coord =
- nir_replicate(&b, dst_local_pos, 4);
+ nir_ssa_def *dst_coord = nir_replicate(&b, dst_local_pos, 4);
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord,
- nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, 0),
- nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord, nir_ssa_undef(&b, 1, 32),
+ nir_channel(&b, outval, 0), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
}
return b.shader;
VkResult result;
nir_shader *cs = build_nir_itoi_r32g32b32_compute_shader(device);
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- {.binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
- &device->meta_state.alloc,
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
&device->meta_state.itoi_r32g32b32.img_ds_layout);
if (result != VK_SUCCESS)
goto fail;
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
};
- result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc,
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
&device->meta_state.itoi_r32g32b32.img_p_layout);
if (result != VK_SUCCESS)
goto fail;
.layout = device->meta_state.itoi_r32g32b32.img_p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info, NULL,
- &device->meta_state.itoi_r32g32b32.pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ NULL, &device->meta_state.itoi_r32g32b32.pipeline);
fail:
ralloc_free(cs);
{
struct radv_meta_state *state = &device->meta_state;
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi_r32g32b32.img_p_layout,
- &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(
- radv_device_to_handle(device), state->itoi_r32g32b32.img_ds_layout, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->itoi_r32g32b32.pipeline,
- &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi_r32g32b32.img_p_layout, &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->itoi_r32g32b32.img_ds_layout, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->itoi_r32g32b32.pipeline, &state->alloc);
}
static nir_shader *
: is_multisampled ? GLSL_SAMPLER_DIM_MS
: GLSL_SAMPLER_DIM_2D;
const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
- nir_builder b = radv_meta_init_shader(
- dev, MESA_SHADER_COMPUTE, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples);
+ nir_builder b =
+ radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples);
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
global_id = nir_vec(&b, comps, 4);
for (uint32_t i = 0; i < samples; i++) {
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,
- nir_imm_int(&b, i), clear_val, nir_imm_int(&b, 0), .image_dim = dim);
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id, nir_imm_int(&b, i),
+ clear_val, nir_imm_int(&b, 0), .image_dim = dim);
}
return b.shader;
.layout = device->meta_state.cleari.img_p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info, NULL, pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ NULL, pipeline);
ralloc_free(cs);
return result;
}
* two descriptors one for the image being sampled
* one for the buffer being written.
*/
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 1,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
- &device->meta_state.alloc,
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
&device->meta_state.cleari.img_ds_layout);
if (result != VK_SUCCESS)
goto fail;
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
};
- result =
- radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc, &device->meta_state.cleari.img_p_layout);
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
+ &device->meta_state.cleari.img_p_layout);
if (result != VK_SUCCESS)
goto fail;
.layout = device->meta_state.cleari.img_p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info_3d, NULL,
- &device->meta_state.cleari.pipeline_3d);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info_3d,
+ NULL, &device->meta_state.cleari.pipeline_3d);
ralloc_free(cs_3d);
return VK_SUCCESS;
{
struct radv_meta_state *state = &device->meta_state;
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari.img_p_layout,
- &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->cleari.img_ds_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari.img_p_layout, &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->cleari.img_ds_layout,
+ &state->alloc);
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline[i], &state->alloc);
nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
- nir_ssa_def *global_pos =
- nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul_imm(&b, global_x, 3));
+ nir_ssa_def *global_pos = nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul_imm(&b, global_x, 3));
for (unsigned chan = 0; chan < 3; chan++) {
nir_ssa_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
nir_ssa_def *coord = nir_replicate(&b, local_pos, 4);
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
- nir_ssa_undef(&b, 1, 32), nir_channel(&b, clear_val, chan),
- nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32),
+ nir_channel(&b, clear_val, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
}
return b.shader;
VkResult result;
nir_shader *cs = build_nir_cleari_r32g32b32_compute_shader(device);
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 1,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
- &device->meta_state.alloc,
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
&device->meta_state.cleari_r32g32b32.img_ds_layout);
if (result != VK_SUCCESS)
goto fail;
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
};
- result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc,
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
&device->meta_state.cleari_r32g32b32.img_p_layout);
if (result != VK_SUCCESS)
goto fail;
.layout = device->meta_state.cleari_r32g32b32.img_p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info, NULL,
- &device->meta_state.cleari_r32g32b32.pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ NULL, &device->meta_state.cleari_r32g32b32.pipeline);
fail:
ralloc_free(cs);
{
struct radv_meta_state *state = &device->meta_state;
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari_r32g32b32.img_p_layout,
- &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(
- radv_device_to_handle(device), state->cleari_r32g32b32.img_ds_layout, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->cleari_r32g32b32.pipeline,
- &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari_r32g32b32.img_p_layout, &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->cleari_r32g32b32.img_ds_layout, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->cleari_r32g32b32.pipeline, &state->alloc);
}
void
}
static void
-create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
- struct radv_image_view *iview, VkFormat format, VkImageAspectFlagBits aspects)
+create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, struct radv_image_view *iview,
+ VkFormat format, VkImageAspectFlagBits aspects)
{
if (format == VK_FORMAT_UNDEFINED)
format = surf->format;
.baseArrayLayer = surf->layer,
.layerCount = 1},
},
- 0, &(struct radv_image_view_extra_create_info){
+ 0,
+ &(struct radv_image_view_extra_create_info){
.disable_compression = surf->disable_compression,
});
}
static void
-create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset,
- VkFormat format, struct radv_buffer_view *bview)
+create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset, VkFormat format,
+ struct radv_buffer_view *bview)
{
radv_buffer_view_init(bview, cmd_buffer->device,
&(VkBufferViewCreateInfo){
}
static void
-create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
- unsigned offset, VkFormat src_format, struct radv_buffer_view *bview)
+create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset,
+ VkFormat src_format, struct radv_buffer_view *bview)
{
VkFormat format;
* image view descriptors instead.
*/
static void
-fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_meta_blit2d_buffer *buf_bsurf,
- const struct radv_meta_blit2d_surf *img_bsurf,
- const struct radv_meta_blit2d_rect *rect, bool to_image)
+fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_blit2d_buffer *buf_bsurf,
+ const struct radv_meta_blit2d_surf *img_bsurf, const struct radv_meta_blit2d_rect *rect,
+ bool to_image)
{
const unsigned mip_level = img_bsurf->level;
const struct radv_image *image = img_bsurf->image;
struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image);
/* GFX10 will use a different workaround unless this is not a 2D image */
- if (rad_info->gfx_level < GFX9 ||
- (rad_info->gfx_level >= GFX10 && image->vk.image_type == VK_IMAGE_TYPE_2D) ||
+ if (rad_info->gfx_level < GFX9 || (rad_info->gfx_level >= GFX10 && image->vk.image_type == VK_IMAGE_TYPE_2D) ||
image->vk.mip_levels == 1 || !vk_format_is_block_compressed(image->vk.format))
return;
/* The actual extent we want to copy */
VkExtent2D mip_extent = {rect->width, rect->height};
- VkOffset2D mip_offset = {to_image ? rect->dst_x : rect->src_x,
- to_image ? rect->dst_y : rect->src_y};
+ VkOffset2D mip_offset = {to_image ? rect->dst_x : rect->src_x, to_image ? rect->dst_y : rect->src_y};
if (hw_mip_extent.width >= mip_offset.x + mip_extent.width &&
hw_mip_extent.height >= mip_offset.y + mip_extent.height)
* while we're fixing them. If we're writing to an image, we do not need
* to wait because the compute shader cannot write to those texels
*/
- cmd_buffer->state.flush_bits |=
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
}
for (uint32_t y = 0; y < mip_extent.height; y++) {
uint32_t x = (coordY < hw_mip_extent.height) ? hw_mip_extent.width : 0;
for (; x < mip_extent.width; x++) {
uint32_t coordX = x + mip_offset.x;
- uint64_t addr = ac_surface_addr_from_coord(addrlib, rad_info, surf, &surf_info,
- mip_level, coordX, coordY, img_bsurf->layer,
- image->vk.image_type == VK_IMAGE_TYPE_3D);
+ uint64_t addr = ac_surface_addr_from_coord(addrlib, rad_info, surf, &surf_info, mip_level, coordX, coordY,
+ img_bsurf->layer, image->vk.image_type == VK_IMAGE_TYPE_3D);
struct radeon_winsys_bo *img_bo = image->bindings[0].bo;
struct radeon_winsys_bo *mem_bo = buf_bsurf->buffer->bo;
const uint64_t img_offset = image->bindings[0].offset + addr;
/* buf_bsurf->offset already includes the layer offset */
- const uint64_t mem_offset = buf_bsurf->buffer->offset +
- buf_bsurf->offset +
- y * buf_bsurf->pitch * surf->bpe +
- x * surf->bpe;
+ const uint64_t mem_offset =
+ buf_bsurf->buffer->offset + buf_bsurf->offset + y * buf_bsurf->pitch * surf->bpe + x * surf->bpe;
if (to_image) {
radv_copy_buffer(cmd_buffer, mem_bo, img_bo, mem_offset, img_offset, surf->bpe);
} else {
}
static unsigned
-get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *surf)
+get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf)
{
unsigned stride;
}
static void
-itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src,
- struct radv_buffer_view *dst)
+itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, struct radv_buffer_view *dst)
{
struct radv_device *device = cmd_buffer->device;
radv_meta_push_descriptor_set(
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itob.img_p_layout, 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){
- {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo =
- (VkDescriptorImageInfo[]){
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(src),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }},
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
- }});
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(src),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }},
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
+ }});
}
void
radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
- struct radv_meta_blit2d_buffer *dst, unsigned num_rects,
- struct radv_meta_blit2d_rect *rects)
+ struct radv_meta_blit2d_buffer *dst, unsigned num_rects, struct radv_meta_blit2d_rect *rects)
{
VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
struct radv_device *device = cmd_buffer->device;
if (src->image->vk.image_type == VK_IMAGE_TYPE_3D)
pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
- pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
for (unsigned r = 0; r < num_rects; ++r) {
unsigned push_constants[4] = {rects[r].src_x, rects[r].src_y, src->layer, dst->pitch};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.itob.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
- 16, push_constants);
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.itob.img_p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
fixup_gfx9_cs_copy(cmd_buffer, dst, src, &rects[r], false);
struct radv_device *device = cmd_buffer->device;
radv_meta_push_descriptor_set(
- cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi_r32g32b32.img_p_layout,
- 0, /* set */
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi_r32g32b32.img_p_layout, 0, /* set */
2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
- }});
+ (VkWriteDescriptorSet[]){{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
+ }});
}
static void
-radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_buffer *src,
+radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src,
struct radv_meta_blit2d_surf *dst, unsigned num_rects,
struct radv_meta_blit2d_rect *rects)
{
create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer);
create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
- create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset, dst->format,
- &dst_view);
+ create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset, dst->format, &dst_view);
btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
- pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
src->pitch,
};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.btoi_r32g32b32.img_p_layout,
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.btoi_r32g32b32.img_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
}
static void
-btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
- struct radv_image_view *dst)
+btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src, struct radv_image_view *dst)
{
struct radv_device *device = cmd_buffer->device;
radv_meta_push_descriptor_set(
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi.img_p_layout, 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
- },
- {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]){
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(dst),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }}});
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
+ },
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(dst),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }}});
}
void
-radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_buffer *src, struct radv_meta_blit2d_surf *dst,
- unsigned num_rects, struct radv_meta_blit2d_rect *rects)
+radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src,
+ struct radv_meta_blit2d_surf *dst, unsigned num_rects, struct radv_meta_blit2d_rect *rects)
{
VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
struct radv_device *device = cmd_buffer->device;
struct radv_buffer_view src_view;
struct radv_image_view dst_view;
- if (dst->image->vk.format == VK_FORMAT_R32G32B32_UINT ||
- dst->image->vk.format == VK_FORMAT_R32G32B32_SINT ||
+ if (dst->image->vk.format == VK_FORMAT_R32G32B32_UINT || dst->image->vk.format == VK_FORMAT_R32G32B32_SINT ||
dst->image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) {
radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects);
return;
if (dst->image->vk.image_type == VK_IMAGE_TYPE_3D)
pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
- pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
for (unsigned r = 0; r < num_rects; ++r) {
unsigned push_constants[4] = {
dst->layer,
src->pitch,
};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.btoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
- 16, push_constants);
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.btoi.img_p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
fixup_gfx9_cs_copy(cmd_buffer, src, dst, &rects[r], true);
struct radv_device *device = cmd_buffer->device;
radv_meta_push_descriptor_set(
- cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi_r32g32b32.img_p_layout,
- 0, /* set */
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi_r32g32b32.img_p_layout, 0, /* set */
2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
- }});
+ (VkWriteDescriptorSet[]){{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
+ }});
}
static void
-radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src,
+radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
struct radv_meta_blit2d_surf *dst, unsigned num_rects,
struct radv_meta_blit2d_rect *rects)
{
create_buffer_from_image(cmd_buffer, src, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, &src_buffer);
create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &dst_buffer);
- create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer), src_offset,
- src->format, &src_view);
- create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer), dst_offset,
- dst->format, &dst_view);
+ create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer), src_offset, src->format, &src_view);
+ create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer), dst_offset, dst->format, &dst_view);
itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
- pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
unsigned push_constants[6] = {
rects[r].src_x, rects[r].src_y, src_stride, rects[r].dst_x, rects[r].dst_y, dst_stride,
};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.itoi_r32g32b32.img_p_layout,
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.itoi_r32g32b32.img_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants);
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
}
static void
-itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src,
- struct radv_image_view *dst)
+itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, struct radv_image_view *dst)
{
struct radv_device *device = cmd_buffer->device;
- radv_meta_push_descriptor_set(
- cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi.img_p_layout, 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo =
- (VkDescriptorImageInfo[]){
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(src),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }},
- {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]){
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(dst),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }}});
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi.img_p_layout,
+ 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(src),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }},
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(dst),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }}});
}
void
radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
- struct radv_meta_blit2d_surf *dst, unsigned num_rects,
- struct radv_meta_blit2d_rect *rects)
+ struct radv_meta_blit2d_surf *dst, unsigned num_rects, struct radv_meta_blit2d_rect *rects)
{
struct radv_device *device = cmd_buffer->device;
struct radv_image_view src_view, dst_view;
return;
}
- u_foreach_bit(i, dst->aspect_mask) {
+ u_foreach_bit (i, dst->aspect_mask) {
unsigned aspect_mask = 1u << i;
VkFormat depth_format = 0;
if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline[samples_log2];
- if (src->image->vk.image_type == VK_IMAGE_TYPE_3D ||
- dst->image->vk.image_type == VK_IMAGE_TYPE_3D)
+ if (src->image->vk.image_type == VK_IMAGE_TYPE_3D || dst->image->vk.image_type == VK_IMAGE_TYPE_3D)
pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
- pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
for (unsigned r = 0; r < num_rects; ++r) {
unsigned push_constants[6] = {
rects[r].src_x, rects[r].src_y, src->layer, rects[r].dst_x, rects[r].dst_y, dst->layer,
};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.itoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
- 24, push_constants);
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.itoi.img_p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants);
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
}
{
struct radv_device *device = cmd_buffer->device;
- radv_meta_push_descriptor_set(
- cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.cleari_r32g32b32.img_p_layout,
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){{
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(view)},
- }});
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.cleari_r32g32b32.img_p_layout, 0, /* set */
+ 1, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(view)},
+ }});
}
static void
-radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *dst,
+radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
const VkClearColorValue *clear_color)
{
VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
*/
create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer);
- create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 0, dst->format,
- &dst_view);
+ create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 0, dst->format, &dst_view);
cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
- pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
stride,
};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.cleari_r32g32b32.img_p_layout,
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.cleari_r32g32b32.img_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
radv_unaligned_dispatch(cmd_buffer, dst->image->vk.extent.width, dst->image->vk.extent.height, 1);
{
struct radv_device *device = cmd_buffer->device;
- radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.cleari.img_p_layout, 0, /* set */
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.cleari.img_p_layout,
+ 0, /* set */
1, /* descriptorWriteCount */
(VkWriteDescriptorSet[]){
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
if (dst->image->vk.image_type == VK_IMAGE_TYPE_3D)
pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
- pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
unsigned push_constants[5] = {
- clear_color->uint32[0],
- clear_color->uint32[1],
- clear_color->uint32[2],
- clear_color->uint32[3],
- dst->layer,
+ clear_color->uint32[0], clear_color->uint32[1], clear_color->uint32[2], clear_color->uint32[3], dst->layer,
};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.cleari.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
- push_constants);
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.cleari.img_p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 20, push_constants);
radv_unaligned_dispatch(cmd_buffer, dst->image->vk.extent.width, dst->image->vk.extent.height, 1);
#include "radv_private.h"
#include "util/format_rgb9e5.h"
-#include "vk_format.h"
#include "vk_common_entrypoints.h"
+#include "vk_format.h"
enum { DEPTH_CLEAR_SLOW, DEPTH_CLEAR_FAST };
uint32_t frag_output)
{
nir_builder vs_b = radv_meta_init_shader(dev, MESA_SHADER_VERTEX, "meta_clear_color_vs");
- nir_builder fs_b =
- radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_clear_color_fs-%d", frag_output);
+ nir_builder fs_b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_clear_color_fs-%d", frag_output);
const struct glsl_type *position_type = glsl_vec4_type();
const struct glsl_type *color_type = glsl_vec4_type();
- nir_variable *vs_out_pos =
- nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position");
+ nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position");
vs_out_pos->data.location = VARYING_SLOT_POS;
- nir_ssa_def *in_color_load =
- nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);
+ nir_ssa_def *in_color_load = nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);
- nir_variable *fs_out_color =
- nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color");
+ nir_variable *fs_out_color = nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color");
fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf);
nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
const struct glsl_type *layer_type = glsl_int_type();
- nir_variable *vs_out_layer =
- nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
+ nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
vs_out_layer->data.location = VARYING_SLOT_LAYER;
vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
}
static VkResult
-create_pipeline(struct radv_device *device, uint32_t samples,
- struct nir_shader *vs_nir, struct nir_shader *fs_nir,
+create_pipeline(struct radv_device *device, uint32_t samples, struct nir_shader *vs_nir, struct nir_shader *fs_nir,
const VkPipelineVertexInputStateCreateInfo *vi_state,
const VkPipelineDepthStencilStateCreateInfo *ds_state,
- const VkPipelineColorBlendStateCreateInfo *cb_state,
- const VkPipelineRenderingCreateInfo *dyn_state,
- const VkPipelineLayout layout,
- const struct radv_graphics_pipeline_create_info *extra,
+ const VkPipelineColorBlendStateCreateInfo *cb_state, const VkPipelineRenderingCreateInfo *dyn_state,
+ const VkPipelineLayout layout, const struct radv_graphics_pipeline_create_info *extra,
const VkAllocationCallbacks *alloc, VkPipeline *pipeline)
{
VkDevice device_h = radv_device_to_handle(device);
VkResult result;
- result = radv_graphics_pipeline_create(
- device_h, device->meta_state.cache,
- &(VkGraphicsPipelineCreateInfo){
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .pNext = dyn_state,
- .stageCount = fs_nir ? 2 : 1,
- .pStages =
- (VkPipelineShaderStageCreateInfo[]){
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_VERTEX_BIT,
- .module = vk_shader_module_handle_from_nir(vs_nir),
- .pName = "main",
- },
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = vk_shader_module_handle_from_nir(fs_nir),
- .pName = "main",
- },
- },
- .pVertexInputState = vi_state,
- .pInputAssemblyState =
- &(VkPipelineInputAssemblyStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
- .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
- .primitiveRestartEnable = false,
- },
- .pViewportState =
- &(VkPipelineViewportStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState =
- &(VkPipelineRasterizationStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
- .depthBiasEnable = false,
- .depthBiasConstantFactor = 0.0f,
- .depthBiasClamp = 0.0f,
- .depthBiasSlopeFactor = 0.0f,
- .lineWidth = 1.0f,
- },
- .pMultisampleState =
- &(VkPipelineMultisampleStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = samples,
- .sampleShadingEnable = false,
- .pSampleMask = NULL,
- .alphaToCoverageEnable = false,
- .alphaToOneEnable = false,
- },
- .pDepthStencilState = ds_state,
- .pColorBlendState = cb_state,
- .pDynamicState =
- &(VkPipelineDynamicStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 3,
- .pDynamicStates =
- (VkDynamicState[]){
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- VK_DYNAMIC_STATE_STENCIL_REFERENCE,
- },
- },
- .layout = layout,
- .flags = 0,
- .renderPass = VK_NULL_HANDLE,
- .subpass = 0,
- },
- extra, alloc, pipeline);
+ result = radv_graphics_pipeline_create(device_h, device->meta_state.cache,
+ &(VkGraphicsPipelineCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = dyn_state,
+ .stageCount = fs_nir ? 2 : 1,
+ .pStages =
+ (VkPipelineShaderStageCreateInfo[]){
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = vk_shader_module_handle_from_nir(vs_nir),
+ .pName = "main",
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = vk_shader_module_handle_from_nir(fs_nir),
+ .pName = "main",
+ },
+ },
+ .pVertexInputState = vi_state,
+ .pInputAssemblyState =
+ &(VkPipelineInputAssemblyStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState =
+ &(VkPipelineViewportStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState =
+ &(VkPipelineRasterizationStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ .depthBiasEnable = false,
+ .depthBiasConstantFactor = 0.0f,
+ .depthBiasClamp = 0.0f,
+ .depthBiasSlopeFactor = 0.0f,
+ .lineWidth = 1.0f,
+ },
+ .pMultisampleState =
+ &(VkPipelineMultisampleStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = samples,
+ .sampleShadingEnable = false,
+ .pSampleMask = NULL,
+ .alphaToCoverageEnable = false,
+ .alphaToOneEnable = false,
+ },
+ .pDepthStencilState = ds_state,
+ .pColorBlendState = cb_state,
+ .pDynamicState =
+ &(VkPipelineDynamicStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 3,
+ .pDynamicStates =
+ (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ },
+ },
+ .layout = layout,
+ .flags = 0,
+ .renderPass = VK_NULL_HANDLE,
+ .subpass = 0,
+ },
+ extra, alloc, pipeline);
ralloc_free(vs_nir);
ralloc_free(fs_nir);
}
static VkResult
-create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output,
- VkFormat format, VkPipeline *pipeline)
+create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output, VkFormat format,
+ VkPipeline *pipeline)
{
struct nir_shader *vs_nir;
struct nir_shader *fs_nir;
VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = {0};
blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState){
.blendEnable = false,
- .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
- VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT,
+ .colorWriteMask =
+ VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT,
};
const VkPipelineColorBlendStateCreateInfo cb_state = {
.logicOpEnable = false,
.attachmentCount = MAX_RTS,
.pAttachments = blend_attachment_state,
- .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }};
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}};
- VkFormat att_formats[MAX_RTS] = { 0 };
+ VkFormat att_formats[MAX_RTS] = {0};
att_formats[frag_output] = format;
const VkPipelineRenderingCreateInfo rendering_create_info = {
struct radv_graphics_pipeline_create_info extra = {
.use_rectlist = true,
};
- result =
- create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
- &rendering_create_info, device->meta_state.clear_color_p_layout,
- &extra, &device->meta_state.alloc, pipeline);
+ result = create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, &rendering_create_info,
+ device->meta_state.clear_color_p_layout, &extra, &device->meta_state.alloc, pipeline);
mtx_unlock(&device->meta_state.mtx);
return result;
{
struct radv_meta_state *state = &device->meta_state;
- radv_DestroyPipeline(radv_device_to_handle(device), state->clear_htile_mask_pipeline,
- &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_htile_mask_p_layout,
- &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(
- radv_device_to_handle(device), state->clear_htile_mask_ds_layout, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->clear_htile_mask_pipeline, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_htile_mask_p_layout, &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->clear_htile_mask_ds_layout, &state->alloc);
}
static void
struct radv_meta_state *state = &device->meta_state;
for (uint32_t i = 0; i < 2; i++) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->clear_dcc_comp_to_single_pipeline[i], &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->clear_dcc_comp_to_single_pipeline[i], &state->alloc);
}
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_dcc_comp_to_single_p_layout,
- &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(
- radv_device_to_handle(device), state->clear_dcc_comp_to_single_ds_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_dcc_comp_to_single_p_layout, &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->clear_dcc_comp_to_single_ds_layout, &state->alloc);
}
void
for (uint32_t i = 0; i < ARRAY_SIZE(state->color_clear); ++i) {
for (uint32_t j = 0; j < ARRAY_SIZE(state->color_clear[0]); ++j) {
for (uint32_t k = 0; k < ARRAY_SIZE(state->color_clear[i][j].color_pipelines); ++k) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->color_clear[i][j].color_pipelines[k], &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->color_clear[i][j].color_pipelines[k],
+ &state->alloc);
}
}
}
for (uint32_t i = 0; i < ARRAY_SIZE(state->ds_clear); ++i) {
for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->ds_clear[i].depth_only_pipeline[j], &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->ds_clear[i].stencil_only_pipeline[j], &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->ds_clear[i].depthstencil_pipeline[j], &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->ds_clear[i].depth_only_unrestricted_pipeline[j],
+ radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depth_only_pipeline[j], &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].stencil_only_pipeline[j],
&state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->ds_clear[i].stencil_only_unrestricted_pipeline[j],
+ radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depthstencil_pipeline[j],
&state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->ds_clear[i].depthstencil_unrestricted_pipeline[j],
+
+ radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depth_only_unrestricted_pipeline[j],
&state->alloc);
- }
- }
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_color_p_layout,
+ radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].stencil_only_unrestricted_pipeline[j],
&state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_p_layout,
+ radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depthstencil_unrestricted_pipeline[j],
&state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->clear_depth_unrestricted_p_layout, &state->alloc);
+ }
+ }
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_color_p_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_p_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_unrestricted_p_layout, &state->alloc);
finish_meta_clear_htile_mask_state(device);
finish_meta_clear_dcc_comp_to_single_state(device);
}
static void
-emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
- const VkClearRect *clear_rect, uint32_t view_mask)
+emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, const VkClearRect *clear_rect,
+ uint32_t view_mask)
{
struct radv_device *device = cmd_buffer->device;
const struct radv_rendering_state *render = &cmd_buffer->state.render;
assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
assert(clear_att->colorAttachment < render->color_att_count);
- const struct radv_attachment *color_att =
- &render->color_att[clear_att->colorAttachment];
+ const struct radv_attachment *color_att = &render->color_att[clear_att->colorAttachment];
/* When a framebuffer is bound to the current command buffer, get the
* number of samples from it. Otherwise, get the number of samples from
fs_key = radv_format_meta_fs_key(device, format);
assert(fs_key != -1);
- if (device->meta_state.color_clear[samples_log2][clear_att->colorAttachment]
- .color_pipelines[fs_key] == VK_NULL_HANDLE) {
+ if (device->meta_state.color_clear[samples_log2][clear_att->colorAttachment].color_pipelines[fs_key] ==
+ VK_NULL_HANDLE) {
VkResult ret = create_color_pipeline(
device, samples, clear_att->colorAttachment, radv_fs_key_format_exemplars[fs_key],
- &device->meta_state.color_clear[samples_log2][clear_att->colorAttachment]
- .color_pipelines[fs_key]);
+ &device->meta_state.color_clear[samples_log2][clear_att->colorAttachment].color_pipelines[fs_key]);
if (ret != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, ret);
return;
}
}
- pipeline = device->meta_state.color_clear[samples_log2][clear_att->colorAttachment]
- .color_pipelines[fs_key];
+ pipeline = device->meta_state.color_clear[samples_log2][clear_att->colorAttachment].color_pipelines[fs_key];
assert(samples_log2 < ARRAY_SIZE(device->meta_state.color_clear));
assert(pipeline);
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.clear_color_p_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0,
- 16, &clear_value);
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.clear_color_p_layout,
+ VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, &clear_value);
radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
if (view_mask) {
- u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
+ u_foreach_bit (i, view_mask)
+ radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
} else {
radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
}
}
static void
-build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs,
- struct nir_shader **out_fs, bool unrestricted)
+build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs,
+ bool unrestricted)
{
nir_builder vs_b = radv_meta_init_shader(
- dev, MESA_SHADER_VERTEX,
- unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs");
- nir_builder fs_b = radv_meta_init_shader(
- dev, MESA_SHADER_FRAGMENT,
- unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs");
+ dev, MESA_SHADER_VERTEX, unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs");
+ nir_builder fs_b =
+ radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT,
+ unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs");
const struct glsl_type *position_out_type = glsl_vec4_type();
- nir_variable *vs_out_pos =
- nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position");
+ nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position");
vs_out_pos->data.location = VARYING_SLOT_POS;
nir_ssa_def *z;
if (unrestricted) {
- nir_ssa_def *in_color_load =
- nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);
+ nir_ssa_def *in_color_load = nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);
- nir_variable *fs_out_depth =
- nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth");
+ nir_variable *fs_out_depth = nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth");
fs_out_depth->data.location = FRAG_RESULT_DEPTH;
nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1);
nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
const struct glsl_type *layer_type = glsl_int_type();
- nir_variable *vs_out_layer =
- nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
+ nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
vs_out_layer->data.location = VARYING_SLOT_LAYER;
vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
}
static VkResult
-create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspects,
- uint32_t samples, int index, bool unrestricted, VkPipeline *pipeline)
+create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspects, uint32_t samples, int index,
+ bool unrestricted, VkPipeline *pipeline)
{
struct nir_shader *vs_nir, *fs_nir;
VkResult result;
.logicOpEnable = false,
.attachmentCount = 0,
.pAttachments = NULL,
- .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f },
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
};
const VkPipelineRenderingCreateInfo rendering_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
- .depthAttachmentFormat =
- (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? VK_FORMAT_D32_SFLOAT : VK_FORMAT_UNDEFINED,
- .stencilAttachmentFormat =
- (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? VK_FORMAT_S8_UINT : VK_FORMAT_UNDEFINED,
+ .depthAttachmentFormat = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? VK_FORMAT_D32_SFLOAT : VK_FORMAT_UNDEFINED,
+ .stencilAttachmentFormat = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? VK_FORMAT_S8_UINT : VK_FORMAT_UNDEFINED,
};
struct radv_graphics_pipeline_create_info extra = {
if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
}
- result =
- create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
- &rendering_create_info, device->meta_state.clear_depth_p_layout, &extra,
- &device->meta_state.alloc, pipeline);
+ result = create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, &rendering_create_info,
+ device->meta_state.clear_depth_p_layout, &extra, &device->meta_state.alloc, pipeline);
mtx_unlock(&device->meta_state.mtx);
return result;
}
-static bool
-radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
- VkImageLayout image_layout,
- VkImageAspectFlags aspects, const VkClearRect *clear_rect,
- const VkClearDepthStencilValue clear_value, uint32_t view_mask);
+static bool radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
+ VkImageLayout image_layout, VkImageAspectFlags aspects,
+ const VkClearRect *clear_rect, const VkClearDepthStencilValue clear_value,
+ uint32_t view_mask);
static VkPipeline
pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_state *meta_state,
- const struct radv_image_view *iview, int samples_log2,
- VkImageAspectFlags aspects, VkImageLayout layout,
- const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value,
+ const struct radv_image_view *iview, int samples_log2, VkImageAspectFlags aspects,
+ VkImageLayout layout, const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value,
uint32_t view_mask)
{
- bool fast = radv_can_fast_clear_depth(cmd_buffer, iview, layout, aspects, clear_rect,
- clear_value, view_mask);
+ bool fast = radv_can_fast_clear_depth(cmd_buffer, iview, layout, aspects, clear_rect, clear_value, view_mask);
bool unrestricted = cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted;
int index = fast ? DEPTH_CLEAR_FAST : DEPTH_CLEAR_SLOW;
VkPipeline *pipeline;
switch (aspects) {
case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
- pipeline = unrestricted
- ? &meta_state->ds_clear[samples_log2].depthstencil_unrestricted_pipeline[index]
- : &meta_state->ds_clear[samples_log2].depthstencil_pipeline[index];
+ pipeline = unrestricted ? &meta_state->ds_clear[samples_log2].depthstencil_unrestricted_pipeline[index]
+ : &meta_state->ds_clear[samples_log2].depthstencil_pipeline[index];
break;
case VK_IMAGE_ASPECT_DEPTH_BIT:
- pipeline = unrestricted
- ? &meta_state->ds_clear[samples_log2].depth_only_unrestricted_pipeline[index]
- : &meta_state->ds_clear[samples_log2].depth_only_pipeline[index];
+ pipeline = unrestricted ? &meta_state->ds_clear[samples_log2].depth_only_unrestricted_pipeline[index]
+ : &meta_state->ds_clear[samples_log2].depth_only_pipeline[index];
break;
case VK_IMAGE_ASPECT_STENCIL_BIT:
- pipeline = unrestricted
- ? &meta_state->ds_clear[samples_log2].stencil_only_unrestricted_pipeline[index]
- : &meta_state->ds_clear[samples_log2].stencil_only_pipeline[index];
+ pipeline = unrestricted ? &meta_state->ds_clear[samples_log2].stencil_only_unrestricted_pipeline[index]
+ : &meta_state->ds_clear[samples_log2].stencil_only_pipeline[index];
break;
default:
unreachable("expected depth or stencil aspect");
}
if (*pipeline == VK_NULL_HANDLE) {
- VkResult ret = create_depthstencil_pipeline(
- cmd_buffer->device, aspects, 1u << samples_log2, index, unrestricted, pipeline);
+ VkResult ret =
+ create_depthstencil_pipeline(cmd_buffer->device, aspects, 1u << samples_log2, index, unrestricted, pipeline);
if (ret != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, ret);
return VK_NULL_HANDLE;
clear_value.depth = 1.0f;
if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted) {
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.clear_depth_unrestricted_p_layout,
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.clear_depth_unrestricted_p_layout,
VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4, &clear_value.depth);
} else {
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.clear_depth_p_layout, VK_SHADER_STAGE_VERTEX_BIT, 0,
- 4, &clear_value.depth);
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.clear_depth_p_layout,
+ VK_SHADER_STAGE_VERTEX_BIT, 0, 4, &clear_value.depth);
}
uint32_t prev_reference = cmd_buffer->state.dynamic.vk.ds.stencil.front.reference;
radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, clear_value.stencil);
}
- VkPipeline pipeline =
- pick_depthstencil_pipeline(cmd_buffer, meta_state, iview, samples_log2, aspects,
- render->ds_att.layout, clear_rect, clear_value, view_mask);
+ VkPipeline pipeline = pick_depthstencil_pipeline(cmd_buffer, meta_state, iview, samples_log2, aspects,
+ render->ds_att.layout, clear_rect, clear_value, view_mask);
if (!pipeline)
return;
radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
- if (radv_can_fast_clear_depth(cmd_buffer, iview, render->ds_att.layout, aspects,
- clear_rect, clear_value, view_mask))
+ if (radv_can_fast_clear_depth(cmd_buffer, iview, render->ds_att.layout, aspects, clear_rect, clear_value, view_mask))
radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
if (view_mask) {
- u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
+ u_foreach_bit (i, view_mask)
+ radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
} else {
radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
}
}
static uint32_t
-clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
- struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t htile_value,
- uint32_t htile_mask)
+clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, struct radeon_winsys_bo *bo,
+ uint64_t offset, uint64_t size, uint32_t htile_value, uint32_t htile_mask)
{
struct radv_device *device = cmd_buffer->device;
struct radv_meta_state *state = &device->meta_state;
struct radv_meta_saved_state saved_state;
struct radv_buffer dst_buffer;
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
radv_buffer_init(&dst_buffer, device, bo, size, offset);
radv_meta_push_descriptor_set(
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->clear_htile_mask_p_layout, 0, /* set */
- 1, /* descriptorWriteCount */
+ 1, /* descriptorWriteCount */
(VkWriteDescriptorSet[]){
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),
- .offset = 0,
- .range = size}}});
+ .pBufferInfo =
+ &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer), .offset = 0, .range = size}}});
const unsigned constants[2] = {
htile_value & htile_mask,
radv_meta_restore(&saved_state, cmd_buffer);
- return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
+ return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
}
static uint32_t
* +---------+---------+-------+
* | Max Z | Min Z | ZMask |
*/
- htile_value = (((zmax & 0x3fff) << 18) |
- ((zmin & 0x3fff) << 4) |
- ((zmask & 0xf) << 0));
+ htile_value = (((zmax & 0x3fff) << 18) | ((zmin & 0x3fff) << 4) | ((zmask & 0xf) << 0));
} else {
/* Z and stencil:
if (radv_image_has_vrs_htile(device, image))
sresults = 0x3;
- htile_value = (((zrange & 0xfffff) << 12) |
- ((smem & 0x3) << 8) |
- ((sresults & 0xf) << 4) |
- ((zmask & 0xf) << 0));
+ htile_value = (((zrange & 0xfffff) << 12) | ((smem & 0x3) << 8) | ((sresults & 0xf) << 4) | ((zmask & 0xf) << 0));
}
return htile_value;
}
static uint32_t
-radv_get_htile_mask(const struct radv_device *device, const struct radv_image *image,
- VkImageAspectFlags aspects)
+radv_get_htile_mask(const struct radv_device *device, const struct radv_image *image, VkImageAspectFlags aspects)
{
uint32_t mask = 0;
static bool
radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
- VkImageLayout image_layout,
- VkImageAspectFlags aspects, const VkClearRect *clear_rect,
+ VkImageLayout image_layout, VkImageAspectFlags aspects, const VkClearRect *clear_rect,
const VkClearDepthStencilValue clear_value, uint32_t view_mask)
{
if (!iview || !iview->support_fast_clear)
return false;
- if (!radv_layout_is_htile_compressed(
- cmd_buffer->device, iview->image, image_layout,
- radv_image_queue_family_mask(iview->image, cmd_buffer->qf,
- cmd_buffer->qf)))
+ if (!radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, image_layout,
+ radv_image_queue_family_mask(iview->image, cmd_buffer->qf, cmd_buffer->qf)))
return false;
if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
clear_rect->rect.extent.height != iview->image->vk.extent.height)
return false;
- if (view_mask && (iview->image->vk.array_layers >= 32 ||
- (1u << iview->image->vk.array_layers) - 1u != view_mask))
+ if (view_mask && (iview->image->vk.array_layers >= 32 || (1u << iview->image->vk.array_layers) - 1u != view_mask))
return false;
if (!view_mask && clear_rect->baseArrayLayer != 0)
return false;
return false;
if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted &&
- (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
- (clear_value.depth < 0.0 || clear_value.depth > 1.0))
+ (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && (clear_value.depth < 0.0 || clear_value.depth > 1.0))
return false;
if (radv_image_is_tc_compat_htile(iview->image) &&
(((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && !radv_is_fast_clear_depth_allowed(clear_value)) ||
- ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
- !radv_is_fast_clear_stencil_allowed(clear_value))))
+ ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && !radv_is_fast_clear_stencil_allowed(clear_value))))
return false;
if (iview->image->vk.mip_levels > 1) {
if (pre_flush) {
enum radv_cmd_flush_bits bits =
- radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
- iview->image) |
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT |
- VK_ACCESS_2_SHADER_READ_BIT, iview->image);
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, iview->image) |
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT | VK_ACCESS_2_SHADER_READ_BIT, iview->image);
cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
*pre_flush |= cmd_buffer->state.flush_bits;
}
VkResult result;
nir_shader *cs = build_clear_htile_mask_shader(device);
- VkDescriptorSetLayoutCreateInfo ds_layout_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 1,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info,
- &state->alloc, &state->clear_htile_mask_ds_layout);
+ VkDescriptorSetLayoutCreateInfo ds_layout_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, &state->alloc,
+ &state->clear_htile_mask_ds_layout);
if (result != VK_SUCCESS)
goto fail;
.layout = state->clear_htile_mask_p_layout,
};
- result =
- radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &pipeline_info,
- NULL, &state->clear_htile_mask_pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &pipeline_info, NULL,
+ &state->clear_htile_mask_pipeline);
fail:
ralloc_free(cs);
enum glsl_sampler_dim dim = is_msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
const struct glsl_type *img_type = glsl_image_type(dim, true, GLSL_TYPE_FLOAT);
- nir_builder b =
- radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_dcc_comp_to_single-%s",
- is_msaa ? "multisampled" : "singlesampled");
+ nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_dcc_comp_to_single-%s",
+ is_msaa ? "multisampled" : "singlesampled");
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
/* Compute the coordinates. */
nir_ssa_def *coord = nir_trim_vector(&b, global_id, 2);
coord = nir_imul(&b, coord, dcc_block_size);
- coord = nir_vec4(&b, nir_channel(&b, coord, 0),
- nir_channel(&b, coord, 1),
- nir_channel(&b, global_id, 2),
- nir_ssa_undef(&b, 1, 32));
+ coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), nir_channel(&b, global_id, 2),
+ nir_ssa_undef(&b, 1, 32));
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
output_img->data.descriptor_set = 0;
/* Load the clear color values. */
nir_ssa_def *clear_values = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);
- nir_ssa_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0),
- nir_channel(&b, clear_values, 1),
- nir_channel(&b, clear_values, 1),
- nir_channel(&b, clear_values, 1));
+ nir_ssa_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0), nir_channel(&b, clear_values, 1),
+ nir_channel(&b, clear_values, 1), nir_channel(&b, clear_values, 1));
/* Store the clear color values. */
nir_ssa_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_ssa_undef(&b, 1, 32);
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
- sample_id, data, nir_imm_int(&b, 0),
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, sample_id, data, nir_imm_int(&b, 0),
.image_dim = dim, .image_array = true);
return b.shader;
.layout = state->clear_dcc_comp_to_single_p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache,
- &pipeline_info, NULL, pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &pipeline_info, NULL, pipeline);
ralloc_free(cs);
return result;
struct radv_meta_state *state = &device->meta_state;
VkResult result;
- VkDescriptorSetLayoutCreateInfo ds_layout_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 1,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info,
- &state->alloc, &state->clear_dcc_comp_to_single_ds_layout);
+ VkDescriptorSetLayoutCreateInfo ds_layout_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, &state->alloc,
+ &state->clear_dcc_comp_to_single_ds_layout);
if (result != VK_SUCCESS)
goto fail;
goto fail;
for (uint32_t i = 0; i < 2; i++) {
- result = create_dcc_comp_to_single_pipeline(device, !!i,
- &state->clear_dcc_comp_to_single_pipeline[i]);
+ result = create_dcc_comp_to_single_pipeline(device, !!i, &state->clear_dcc_comp_to_single_pipeline[i]);
if (result != VK_SUCCESS)
goto fail;
}
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16},
};
- res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_color_create_info,
- &device->meta_state.alloc,
+ res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_color_create_info, &device->meta_state.alloc,
&device->meta_state.clear_color_p_layout);
if (res != VK_SUCCESS)
return res;
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4},
};
- res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_depth_create_info,
- &device->meta_state.alloc,
+ res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_depth_create_info, &device->meta_state.alloc,
&device->meta_state.clear_depth_p_layout);
if (res != VK_SUCCESS)
return res;
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4},
};
- res = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_depth_unrestricted_create_info, &device->meta_state.alloc,
- &device->meta_state.clear_depth_unrestricted_p_layout);
+ res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_depth_unrestricted_create_info,
+ &device->meta_state.alloc, &device->meta_state.clear_depth_unrestricted_p_layout);
if (res != VK_SUCCESS)
return res;
unsigned fs_key = radv_format_meta_fs_key(device, format);
assert(!state->color_clear[i][0].color_pipelines[fs_key]);
- res = create_color_pipeline(device, samples, 0, format,
- &state->color_clear[i][0].color_pipelines[fs_key]);
+ res = create_color_pipeline(device, samples, 0, format, &state->color_clear[i][0].color_pipelines[fs_key]);
if (res != VK_SUCCESS)
return res;
}
if (res != VK_SUCCESS)
return res;
- res = create_depthstencil_pipeline(
- device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false,
- &state->ds_clear[i].depthstencil_pipeline[j]);
+ res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j,
+ false, &state->ds_clear[i].depthstencil_pipeline[j]);
if (res != VK_SUCCESS)
return res;
if (res != VK_SUCCESS)
return res;
- res =
- create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,
- &state->ds_clear[i].stencil_only_unrestricted_pipeline[j]);
+ res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,
+ &state->ds_clear[i].stencil_only_unrestricted_pipeline[j]);
if (res != VK_SUCCESS)
return res;
- res = create_depthstencil_pipeline(
- device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,
- &state->ds_clear[i].depthstencil_unrestricted_pipeline[j]);
+ res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j,
+ true, &state->ds_clear[i].depthstencil_unrestricted_pipeline[j]);
if (res != VK_SUCCESS)
return res;
}
}
uint32_t
-radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- const VkImageSubresourceRange *range, uint32_t value)
+radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
+ uint32_t value)
{
uint64_t offset = image->bindings[0].offset + image->planes[0].surface.cmask_offset;
uint64_t size;
size = slice_size * vk_image_subresource_layer_count(&image->vk, range);
}
- return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
- radv_buffer_get_va(image->bindings[0].bo) + offset, size, value);
+ return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, radv_buffer_get_va(image->bindings[0].bo) + offset,
+ size, value);
}
uint32_t
-radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- const VkImageSubresourceRange *range, uint32_t value)
+radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
+ uint32_t value)
{
uint64_t offset = image->bindings[0].offset + image->planes[0].surface.fmask_offset;
unsigned slice_size = image->planes[0].surface.fmask_slice_size;
offset += slice_size * range->baseArrayLayer;
size = slice_size * vk_image_subresource_layer_count(&image->vk, range);
- return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
- radv_buffer_get_va(image->bindings[0].bo) + offset, size, value);
+ return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, radv_buffer_get_va(image->bindings[0].bo) + offset,
+ size, value);
}
uint32_t
-radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- const VkImageSubresourceRange *range, uint32_t value)
+radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
+ uint32_t value)
{
uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, range);
assert(level == 0);
size = image->planes[0].surface.meta_size;
} else {
- const struct legacy_surf_dcc_level *dcc_level =
- &image->planes[0].surface.u.legacy.color.dcc_level[level];
+ const struct legacy_surf_dcc_level *dcc_level = &image->planes[0].surface.u.legacy.color.dcc_level[level];
/* If dcc_fast_clear_size is 0 (which might happens for
* mipmaps) the fill buffer operation below is a no-op.
* fast clear path fallbacks to slow clears if one
* level can't be fast cleared.
*/
- offset +=
- dcc_level->dcc_offset + dcc_level->dcc_slice_fast_clear_size * range->baseArrayLayer;
+ offset += dcc_level->dcc_offset + dcc_level->dcc_slice_fast_clear_size * range->baseArrayLayer;
size = dcc_level->dcc_slice_fast_clear_size * vk_image_subresource_layer_count(&image->vk, range);
}
continue;
flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
- radv_buffer_get_va(image->bindings[0].bo) + offset,
- size, value);
+ radv_buffer_get_va(image->bindings[0].bo) + offset, size, value);
}
return flush_bits;
}
static uint32_t
-radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range,
- uint32_t color_values[2])
+radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, uint32_t color_values[2])
{
struct radv_device *device = cmd_buffer->device;
unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
unreachable("Unsupported number of bytes per pixel");
}
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);
VkPipeline pipeline = device->meta_state.clear_dcc_comp_to_single_pipeline[is_msaa];
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
- pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, range); l++) {
uint32_t width, height;
width = radv_minify(image->vk.extent.width, range->baseMipLevel + l);
height = radv_minify(image->vk.extent.height, range->baseMipLevel + l);
- radv_image_view_init(
- &iview, cmd_buffer->device,
- &(VkImageViewCreateInfo){
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(image),
- .viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = format,
- .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = range->baseMipLevel + l,
- .levelCount = 1,
- .baseArrayLayer = range->baseArrayLayer,
- .layerCount = layer_count},
- },
- 0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
-
- radv_meta_push_descriptor_set(
- cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.clear_dcc_comp_to_single_p_layout, 0,
- 1,
- (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo =
- (VkDescriptorImageInfo[]){
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }}});
-
- unsigned dcc_width =
- DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width);
- unsigned dcc_height =
- DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height);
+ radv_image_view_init(&iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = format,
+ .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = range->baseMipLevel + l,
+ .levelCount = 1,
+ .baseArrayLayer = range->baseArrayLayer,
+ .layerCount = layer_count},
+ },
+ 0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
+
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.clear_dcc_comp_to_single_p_layout, 0, 1,
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(&iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }}});
+
+ unsigned dcc_width = DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width);
+ unsigned dcc_height = DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height);
const unsigned constants[4] = {
image->planes[0].surface.u.gfx9.color.dcc_block_width,
color_values[1],
};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.clear_dcc_comp_to_single_p_layout,
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.clear_dcc_comp_to_single_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, constants);
radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, layer_count);
radv_meta_restore(&saved_state, cmd_buffer);
- return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
+ return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
}
uint32_t
if (htile_mask == UINT_MAX) {
/* Clear the whole HTILE buffer. */
flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
- radv_buffer_get_va(image->bindings[0].bo) + offset,
- size, value);
+ radv_buffer_get_va(image->bindings[0].bo) + offset, size, value);
} else {
/* Only clear depth or stencil bytes in the HTILE buffer. */
- flush_bits |=
- clear_htile_mask(cmd_buffer, image, image->bindings[0].bo, offset, size, value, htile_mask);
+ flush_bits |= clear_htile_mask(cmd_buffer, image, image->bindings[0].bo, offset, size, value, htile_mask);
}
}
} else {
if (htile_mask == UINT_MAX) {
/* Clear the whole HTILE buffer. */
flush_bits = radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
- radv_buffer_get_va(image->bindings[0].bo) + offset,
- size, value);
+ radv_buffer_get_va(image->bindings[0].bo) + offset, size, value);
} else {
/* Only clear depth or stencil bytes in the HTILE buffer. */
- flush_bits =
- clear_htile_mask(cmd_buffer, image, image->bindings[0].bo, offset, size, value, htile_mask);
+ flush_bits = clear_htile_mask(cmd_buffer, image, image->bindings[0].bo, offset, size, value, htile_mask);
}
}
}
const struct util_format_description *desc = vk_format_description(iview->vk.format);
- if (iview->vk.format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
- iview->vk.format == VK_FORMAT_R5G6B5_UNORM_PACK16 || iview->vk.format == VK_FORMAT_B5G6R5_UNORM_PACK16)
+ if (iview->vk.format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || iview->vk.format == VK_FORMAT_R5G6B5_UNORM_PACK16 ||
+ iview->vk.format == VK_FORMAT_B5G6R5_UNORM_PACK16)
extra_channel = -1;
else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
if (vi_alpha_is_on_msb(device, iview->vk.format))
values[i] = clear_value->int32[i] != 0;
if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)
return;
- } else if (desc->channel[i].pure_integer &&
- desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+ } else if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
/* Use the maximum value for clamping the clear color. */
unsigned max = u_bit_consecutive(0, desc->channel[i].size);
return true;
}
} else if (desc->nr_channels == 4 && desc->channel[0].size == 8) {
- if (value.ub[0] == 0x00 && value.ub[1] == 0x00 &&
- value.ub[2] == 0x00 && value.ub[3] == 0xff) {
+ if (value.ub[0] == 0x00 && value.ub[1] == 0x00 && value.ub[2] == 0x00 && value.ub[3] == 0xff) {
*reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
return true;
- } else if (value.ub[0] == 0xff && value.ub[1] == 0xff &&
- value.ub[2] == 0xff && value.ub[3] == 0x00) {
+ } else if (value.ub[0] == 0xff && value.ub[1] == 0xff && value.ub[2] == 0xff && value.ub[3] == 0x00) {
*reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
return true;
}
} else if (desc->nr_channels == 4 && desc->channel[0].size == 16) {
- if (value.us[0] == 0x0000 && value.us[1] == 0x0000 &&
- value.us[2] == 0x0000 && value.us[3] == 0xffff) {
+ if (value.us[0] == 0x0000 && value.us[1] == 0x0000 && value.us[2] == 0x0000 && value.us[3] == 0xffff) {
*reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
return true;
- } else if (value.us[0] == 0xffff && value.us[1] == 0xffff &&
- value.us[2] == 0xffff && value.us[3] == 0x0000) {
+ } else if (value.us[0] == 0xffff && value.us[1] == 0xffff && value.us[2] == 0xffff && value.us[3] == 0x0000) {
*reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
return true;
}
static bool
radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
- VkImageLayout image_layout,
- const VkClearRect *clear_rect, VkClearColorValue clear_value,
+ VkImageLayout image_layout, const VkClearRect *clear_rect, VkClearColorValue clear_value,
uint32_t view_mask)
{
uint32_t clear_color[2];
if (!iview || !iview->support_fast_clear)
return false;
- if (!radv_layout_can_fast_clear(
- cmd_buffer->device, iview->image, iview->vk.base_mip_level, image_layout,
- radv_image_queue_family_mask(iview->image, cmd_buffer->qf,
- cmd_buffer->qf)))
+ if (!radv_layout_can_fast_clear(cmd_buffer->device, iview->image, iview->vk.base_mip_level, image_layout,
+ radv_image_queue_family_mask(iview->image, cmd_buffer->qf, cmd_buffer->qf)))
return false;
if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
clear_rect->rect.extent.height != iview->image->vk.extent.height)
return false;
- if (view_mask && (iview->image->vk.array_layers >= 32 ||
- (1u << iview->image->vk.array_layers) - 1u != view_mask))
+ if (view_mask && (iview->image->vk.array_layers >= 32 || (1u << iview->image->vk.array_layers) - 1u != view_mask))
return false;
if (!view_mask && clear_rect->baseArrayLayer != 0)
return false;
return false;
/* Images that support comp-to-single clears don't have clear values. */
- if (!iview->image->support_comp_to_single &&
- !radv_image_has_clear_value(iview->image) && (clear_color[0] != 0 || clear_color[1] != 0))
+ if (!iview->image->support_comp_to_single && !radv_image_has_clear_value(iview->image) &&
+ (clear_color[0] != 0 || clear_color[1] != 0))
return false;
if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level)) {
uint32_t reset_value;
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
- if (!gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value,
- &reset_value))
+ if (!gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value))
return false;
} else {
gfx8_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value,
bool can_avoid_fast_clear_elim = true;
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
- ASSERTED bool result =
- gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value);
+ ASSERTED bool result = gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value);
assert(result);
} else {
gfx8_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value,
* The parameters mean that same as those in vkCmdClearAttachments.
*/
static void
-emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
- const VkClearRect *clear_rect, enum radv_cmd_flush_bits *pre_flush,
- enum radv_cmd_flush_bits *post_flush, uint32_t view_mask)
+emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, const VkClearRect *clear_rect,
+ enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush, uint32_t view_mask)
{
const struct radv_rendering_state *render = &cmd_buffer->state.render;
VkImageAspectFlags aspects = clear_att->aspectMask;
if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
assert(clear_att->colorAttachment < render->color_att_count);
- const struct radv_attachment *color_att =
- &render->color_att[clear_att->colorAttachment];
+ const struct radv_attachment *color_att = &render->color_att[clear_att->colorAttachment];
if (color_att->format == VK_FORMAT_UNDEFINED)
return;
VkClearColorValue clear_value = clear_att->clearValue.color;
- if (radv_can_fast_clear_color(cmd_buffer, color_att->iview, color_att->layout, clear_rect,
- clear_value, view_mask)) {
+ if (radv_can_fast_clear_color(cmd_buffer, color_att->iview, color_att->layout, clear_rect, clear_value,
+ view_mask)) {
radv_fast_clear_color(cmd_buffer, color_att->iview, clear_att, pre_flush, post_flush);
} else {
emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
- if (radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, ds_att->layout, aspects,
- clear_rect, clear_value, view_mask)) {
+ if (radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, ds_att->layout, aspects, clear_rect, clear_value,
+ view_mask)) {
radv_fast_clear_depth(cmd_buffer, ds_att->iview, clear_att, pre_flush, post_flush);
} else {
emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect, view_mask);
return true;
}
- if (pRenderingInfo->pDepthAttachment != NULL &&
- pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
+ if (pRenderingInfo->pDepthAttachment != NULL && pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
return true;
- if (pRenderingInfo->pStencilAttachment != NULL &&
- pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE &&
+ if (pRenderingInfo->pStencilAttachment != NULL && pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE &&
pRenderingInfo->pStencilAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
return true;
}
static void
-radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer,
- const VkClearAttachment *clear_att,
- enum radv_cmd_flush_bits *pre_flush,
- enum radv_cmd_flush_bits *post_flush)
+radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
+ enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush)
{
const struct radv_rendering_state *render = &cmd_buffer->state.render;
* @see radv_attachment_state::pending_clear_aspects
*/
void
-radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer *cmd_buffer,
- const VkRenderingInfo *pRenderingInfo)
+radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer *cmd_buffer, const VkRenderingInfo *pRenderingInfo)
{
const struct radv_rendering_state *render = &cmd_buffer->state.render;
struct radv_meta_saved_state saved_state;
return;
/* Subpass clear should not be affected by conditional rendering. */
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
assert(render->color_att_count == pRenderingInfo->colorAttachmentCount);
for (uint32_t i = 0; i < render->color_att_count; i++) {
}
if (render->ds_att.iview != NULL) {
- VkClearAttachment clear_att = { .aspectMask = 0 };
+ VkClearAttachment clear_att = {.aspectMask = 0};
- if (pRenderingInfo->pDepthAttachment != NULL &&
- pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
+ if (pRenderingInfo->pDepthAttachment != NULL && pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
clear_att.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
- clear_att.clearValue.depthStencil.depth =
- pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth;
+ clear_att.clearValue.depthStencil.depth = pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth;
}
if (pRenderingInfo->pStencilAttachment != NULL &&
}
static void
-radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- VkImageLayout image_layout, const VkImageSubresourceRange *range,
- VkFormat format, int level, unsigned layer_count,
+radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout image_layout,
+ const VkImageSubresourceRange *range, VkFormat format, int level, unsigned layer_count,
const VkClearValue *clear_val)
{
struct radv_image_view iview;
VkRenderingInfo rendering_info = {
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
- .renderArea = {
- .offset = { 0, 0 },
- .extent = { width, height },
- },
+ .renderArea =
+ {
+ .offset = {0, 0},
+ .extent = {width, height},
+ },
.layerCount = layer_count,
};
*/
static bool
radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkFormat format,
- VkImageLayout image_layout,
- const VkImageSubresourceRange *range, const VkClearValue *clear_val)
+ VkImageLayout image_layout, const VkImageSubresourceRange *range, const VkClearValue *clear_val)
{
struct radv_image_view iview;
bool fast_cleared = false;
};
if (vk_format_is_color(format)) {
- if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout, &clear_rect,
- clear_att.clearValue.color, 0)) {
+ if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout, &clear_rect, clear_att.clearValue.color, 0)) {
radv_fast_clear_color(cmd_buffer, &iview, &clear_att, NULL, NULL);
fast_cleared = true;
}
} else {
- if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout,
- range->aspectMask, &clear_rect,
+ if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout, range->aspectMask, &clear_rect,
clear_att.clearValue.depthStencil, 0)) {
radv_fast_clear_depth(cmd_buffer, &iview, &clear_att, NULL, NULL);
fast_cleared = true;
}
static void
-radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- VkImageLayout image_layout, const VkClearValue *clear_value,
- uint32_t range_count, const VkImageSubresourceRange *ranges, bool cs)
+radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout image_layout,
+ const VkClearValue *clear_value, uint32_t range_count, const VkImageSubresourceRange *ranges,
+ bool cs)
{
VkFormat format = image->vk.format;
VkClearValue internal_clear_value;
if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
bool blendable;
if (cs ? !radv_is_storage_image_format_supported(cmd_buffer->device->physical_device, format)
- : !radv_is_colorbuffer_format_supported(cmd_buffer->device->physical_device, format,
- &blendable)) {
+ : !radv_is_colorbuffer_format_supported(cmd_buffer->device->physical_device, format, &blendable)) {
format = VK_FORMAT_R32_UINT;
internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32);
- uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf,
- cmd_buffer->qf);
+ uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf);
for (uint32_t r = 0; r < range_count; r++) {
const VkImageSubresourceRange *range = &ranges[r];
/* Don't use compressed image stores because they will use an incompatible format. */
- if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel,
- image_layout, queue_mask)) {
+ if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, image_layout, queue_mask)) {
disable_compression = cs;
break;
}
/* Try to perform a fast clear first, otherwise fallback to
* the legacy path.
*/
- if (!cs && radv_fast_clear_range(cmd_buffer, image, format, image_layout, range,
- &internal_clear_value)) {
+ if (!cs && radv_fast_clear_range(cmd_buffer, image, format, image_layout, range, &internal_clear_value)) {
continue;
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
- const VkClearColorValue *pColor, uint32_t rangeCount,
- const VkImageSubresourceRange *pRanges)
+ const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_image, image, image_h);
struct radv_meta_saved_state saved_state;
bool cs;
- cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE ||
- !radv_image_is_renderable(cmd_buffer->device, image);
+ cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(cmd_buffer->device, image);
/* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering.
*/
radv_meta_save(&saved_state, cmd_buffer, save_flags);
- radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pColor, rangeCount,
- pRanges, cs);
+ radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pColor, rangeCount, pRanges, cs);
radv_meta_restore(&saved_state, cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h,
- VkImageLayout imageLayout,
+radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
const VkImageSubresourceRange *pRanges)
{
struct radv_meta_saved_state saved_state;
/* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering. */
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
- radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pDepthStencil,
- rangeCount, pRanges, false);
+ radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pDepthStencil, rangeCount, pRanges,
+ false);
radv_meta_restore(&saved_state, cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
- const VkClearAttachment *pAttachments, uint32_t rectCount,
- const VkClearRect *pRects)
+radv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments,
+ uint32_t rectCount, const VkClearRect *pRects)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_meta_saved_state saved_state;
if (!cmd_buffer->state.render.active)
return;
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
+ radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
/* FINISHME: We can do better than this dumb loop. It thrashes too much
* state.
}
static struct radv_meta_blit2d_surf
-blit_surf_for_image_level_layer(struct radv_image *image, VkImageLayout layout,
- const VkImageSubresourceLayers *subres,
+blit_surf_for_image_level_layer(struct radv_image *image, VkImageLayout layout, const VkImageSubresourceLayers *subres,
VkImageAspectFlags aspect_mask)
{
VkFormat format = radv_get_aspect_format(image, aspect_mask);
bool
radv_image_is_renderable(const struct radv_device *device, const struct radv_image *image)
{
- if (image->vk.format == VK_FORMAT_R32G32B32_UINT ||
- image->vk.format == VK_FORMAT_R32G32B32_SINT ||
+ if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
image->vk.format == VK_FORMAT_R32G32B32_SFLOAT)
return false;
- if (device->physical_device->rad_info.gfx_level >= GFX9 &&
- image->vk.image_type == VK_IMAGE_TYPE_3D &&
- vk_format_get_blocksizebits(image->vk.format) == 128 &&
- vk_format_is_compressed(image->vk.format))
+ if (device->physical_device->rad_info.gfx_level >= GFX9 && image->vk.image_type == VK_IMAGE_TYPE_3D &&
+ vk_format_get_blocksizebits(image->vk.format) == 128 && vk_format_is_compressed(image->vk.format))
return false;
if (image->planes[0].surface.flags & RADEON_SURF_NO_RENDER_TARGET)
}
static void
-copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
- struct radv_image *image, VkImageLayout layout,
- const VkBufferImageCopy2 *region)
+copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, struct radv_image *image,
+ VkImageLayout layout, const VkBufferImageCopy2 *region)
{
struct radv_meta_saved_state saved_state;
bool cs;
*/
assert(image->vk.samples == 1);
- cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE ||
- !radv_image_is_renderable(cmd_buffer->device, image);
+ cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(cmd_buffer->device, image);
/* VK_EXT_conditional_rendering says that copy commands should not be
* affected by conditional rendering.
*/
radv_meta_save(&saved_state, cmd_buffer,
- (cs ? RADV_META_SAVE_COMPUTE_PIPELINE : RADV_META_SAVE_GRAPHICS_PIPELINE) |
- RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS |
- RADV_META_SUSPEND_PREDICATING);
+ (cs ? RADV_META_SAVE_COMPUTE_PIPELINE : RADV_META_SAVE_GRAPHICS_PIPELINE) | RADV_META_SAVE_CONSTANTS |
+ RADV_META_SAVE_DESCRIPTORS | RADV_META_SUSPEND_PREDICATING);
/**
* From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
};
/* Create blit surfaces */
- struct radv_meta_blit2d_surf img_bsurf = blit_surf_for_image_level_layer(
- image, layout, ®ion->imageSubresource, region->imageSubresource.aspectMask);
+ struct radv_meta_blit2d_surf img_bsurf =
+ blit_surf_for_image_level_layer(image, layout, ®ion->imageSubresource, region->imageSubresource.aspectMask);
if (!radv_is_buffer_format_supported(img_bsurf.format, NULL)) {
- uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf,
- cmd_buffer->qf);
+ uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf);
bool compressed =
- radv_layout_dcc_compressed(cmd_buffer->device, image, region->imageSubresource.mipLevel,
- layout, queue_mask);
+ radv_layout_dcc_compressed(cmd_buffer->device, image, region->imageSubresource.mipLevel, layout, queue_mask);
if (compressed) {
radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_UNKNOWN_REASON);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
- const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
+radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
RADV_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage);
for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
- copy_buffer_to_image(cmd_buffer, src_buffer, dst_image,
- pCopyBufferToImageInfo->dstImageLayout,
+ copy_buffer_to_image(cmd_buffer, src_buffer, dst_image, pCopyBufferToImageInfo->dstImageLayout,
&pCopyBufferToImageInfo->pRegions[r]);
}
cmd_buffer->state.flush_bits |=
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) |
- radv_dst_access_flush(
- cmd_buffer, VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, dst_image);
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, dst_image);
for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
radv_meta_decode_etc(cmd_buffer, dst_image, pCopyBufferToImageInfo->dstImageLayout,
&pCopyBufferToImageInfo->pRegions[r].imageSubresource,
}
static void
-copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
- struct radv_image *image, VkImageLayout layout,
- const VkBufferImageCopy2 *region)
+copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, struct radv_image *image,
+ VkImageLayout layout, const VkBufferImageCopy2 *region)
{
struct radv_device *device = cmd_buffer->device;
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
* affected by conditional rendering.
*/
radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS |
- RADV_META_SAVE_DESCRIPTORS | RADV_META_SUSPEND_PREDICATING);
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS |
+ RADV_META_SUSPEND_PREDICATING);
/**
* From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
};
/* Create blit surfaces */
- struct radv_meta_blit2d_surf img_info = blit_surf_for_image_level_layer(
- image, layout, ®ion->imageSubresource, region->imageSubresource.aspectMask);
+ struct radv_meta_blit2d_surf img_info =
+ blit_surf_for_image_level_layer(image, layout, ®ion->imageSubresource, region->imageSubresource.aspectMask);
if (!radv_is_buffer_format_supported(img_info.format, NULL)) {
- uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf,
- cmd_buffer->qf);
- bool compressed = radv_layout_dcc_compressed(device, image, region->imageSubresource.mipLevel,
- layout, queue_mask);
+ uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf);
+ bool compressed =
+ radv_layout_dcc_compressed(device, image, region->imageSubresource.mipLevel, layout, queue_mask);
if (compressed) {
radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_UNKNOWN_REASON);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
- const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
+radv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_image, src_image, pCopyImageToBufferInfo->srcImage);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
- copy_image_to_buffer(cmd_buffer, dst_buffer, src_image,
- pCopyImageToBufferInfo->srcImageLayout,
+ copy_image_to_buffer(cmd_buffer, dst_buffer, src_image, pCopyImageToBufferInfo->srcImageLayout,
&pCopyImageToBufferInfo->pRegions[r]);
}
}
static void
-copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
- VkImageLayout src_image_layout, struct radv_image *dst_image,
- VkImageLayout dst_image_layout, const VkImageCopy2 *region)
+copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout,
+ struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageCopy2 *region)
{
struct radv_meta_saved_state saved_state;
bool cs;
*/
assert(src_image->vk.samples == dst_image->vk.samples);
- cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE ||
- !radv_image_is_renderable(cmd_buffer->device, dst_image);
+ cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(cmd_buffer->device, dst_image);
/* VK_EXT_conditional_rendering says that copy commands should not be
* affected by conditional rendering.
*/
radv_meta_save(&saved_state, cmd_buffer,
- (cs ? RADV_META_SAVE_COMPUTE_PIPELINE : RADV_META_SAVE_GRAPHICS_PIPELINE) |
- RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS |
- RADV_META_SUSPEND_PREDICATING);
+ (cs ? RADV_META_SAVE_COMPUTE_PIPELINE : RADV_META_SAVE_GRAPHICS_PIPELINE) | RADV_META_SAVE_CONSTANTS |
+ RADV_META_SAVE_DESCRIPTORS | RADV_META_SUSPEND_PREDICATING);
if (cs) {
/* For partial copies, HTILE should be decompressed before copying because the metadata is
* re-initialized to the uncompressed state after.
*/
- uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf,
- cmd_buffer->qf);
+ uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
- if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, dst_image_layout,
- queue_mask) &&
+ if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, dst_image_layout, queue_mask) &&
(region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||
- region->extent.width != dst_image->vk.extent.width ||
- region->extent.height != dst_image->vk.extent.height ||
+ region->extent.width != dst_image->vk.extent.width || region->extent.height != dst_image->vk.extent.height ||
region->extent.depth != dst_image->vk.extent.depth)) {
radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_UNKNOWN_REASON);
}
}
- VkImageAspectFlags src_aspects[3] = { region->srcSubresource.aspectMask };
- VkImageAspectFlags dst_aspects[3] = { region->dstSubresource.aspectMask };
+ VkImageAspectFlags src_aspects[3] = {region->srcSubresource.aspectMask};
+ VkImageAspectFlags dst_aspects[3] = {region->dstSubresource.aspectMask};
unsigned aspect_count = 1;
- if (region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT &&
- src_image->plane_count > 1) {
- static const VkImageAspectFlags all_planes[3] = {
- VK_IMAGE_ASPECT_PLANE_0_BIT,
- VK_IMAGE_ASPECT_PLANE_1_BIT,
- VK_IMAGE_ASPECT_PLANE_2_BIT
- };
+ if (region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT && src_image->plane_count > 1) {
+ static const VkImageAspectFlags all_planes[3] = {VK_IMAGE_ASPECT_PLANE_0_BIT, VK_IMAGE_ASPECT_PLANE_1_BIT,
+ VK_IMAGE_ASPECT_PLANE_2_BIT};
aspect_count = src_image->plane_count;
for (unsigned i = 0; i < aspect_count; i++) {
for (unsigned a = 0; a < aspect_count; ++a) {
/* Create blit surfaces */
- struct radv_meta_blit2d_surf b_src = blit_surf_for_image_level_layer(
- src_image, src_image_layout, ®ion->srcSubresource, src_aspects[a]);
+ struct radv_meta_blit2d_surf b_src =
+ blit_surf_for_image_level_layer(src_image, src_image_layout, ®ion->srcSubresource, src_aspects[a]);
- struct radv_meta_blit2d_surf b_dst = blit_surf_for_image_level_layer(
- dst_image, dst_image_layout, ®ion->dstSubresource, dst_aspects[a]);
+ struct radv_meta_blit2d_surf b_dst =
+ blit_surf_for_image_level_layer(dst_image, dst_image_layout, ®ion->dstSubresource, dst_aspects[a]);
- uint32_t dst_queue_mask = radv_image_queue_family_mask(
- dst_image, cmd_buffer->qf, cmd_buffer->qf);
- bool dst_compressed = radv_layout_dcc_compressed(cmd_buffer->device, dst_image,
- region->dstSubresource.mipLevel,
+ uint32_t dst_queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
+ bool dst_compressed = radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel,
dst_image_layout, dst_queue_mask);
- uint32_t src_queue_mask = radv_image_queue_family_mask(
- src_image, cmd_buffer->qf, cmd_buffer->qf);
- bool src_compressed = radv_layout_dcc_compressed(cmd_buffer->device, src_image,
- region->srcSubresource.mipLevel,
+ uint32_t src_queue_mask = radv_image_queue_family_mask(src_image, cmd_buffer->qf, cmd_buffer->qf);
+ bool src_compressed = radv_layout_dcc_compressed(cmd_buffer->device, src_image, region->srcSubresource.mipLevel,
src_image_layout, src_queue_mask);
bool need_dcc_sign_reinterpret = false;
- if (!src_compressed ||
- (radv_dcc_formats_compatible(cmd_buffer->device->physical_device->rad_info.gfx_level,
- b_src.format, b_dst.format, &need_dcc_sign_reinterpret) &&
- !need_dcc_sign_reinterpret)) {
+ if (!src_compressed || (radv_dcc_formats_compatible(cmd_buffer->device->physical_device->rad_info.gfx_level,
+ b_src.format, b_dst.format, &need_dcc_sign_reinterpret) &&
+ !need_dcc_sign_reinterpret)) {
b_src.format = b_dst.format;
} else if (!dst_compressed) {
b_dst.format = b_src.format;
* Also, convert the offsets and extent from units of texels to units of
* blocks - which is the highest resolution accessible in this command.
*/
- const VkOffset3D dst_offset_el =
- vk_image_offset_to_elements(&dst_image->vk, region->dstOffset);
- const VkOffset3D src_offset_el =
- vk_image_offset_to_elements(&src_image->vk, region->srcOffset);
+ const VkOffset3D dst_offset_el = vk_image_offset_to_elements(&dst_image->vk, region->dstOffset);
+ const VkOffset3D src_offset_el = vk_image_offset_to_elements(&src_image->vk, region->srcOffset);
/*
* From Vulkan 1.0.68, "Copying Data Between Images":
if (cs) {
/* Fixup HTILE after a copy on compute. */
- uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf,
- cmd_buffer->qf);
+ uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
- if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, dst_image_layout,
- queue_mask)) {
+ if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, dst_image_layout, queue_mask)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
VkImageSubresourceRange range = {
RADV_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage);
for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
- copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image,
- pCopyImageInfo->dstImageLayout, &pCopyImageInfo->pRegions[r]);
+ copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image, pCopyImageInfo->dstImageLayout,
+ &pCopyImageInfo->pRegions[r]);
}
if (cmd_buffer->device->physical_device->emulate_etc2 &&
cmd_buffer->state.flush_bits |=
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) |
- radv_dst_access_flush(
- cmd_buffer, VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, dst_image);
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, dst_image);
for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
radv_meta_decode_etc(cmd_buffer, dst_image, pCopyImageInfo->dstImageLayout,
- &pCopyImageInfo->pRegions[r].dstSubresource,
- pCopyImageInfo->pRegions[r].dstOffset,
+ &pCopyImageInfo->pRegions[r].dstSubresource, pCopyImageInfo->pRegions[r].dstOffset,
pCopyImageInfo->pRegions[r].extent);
}
}
{
struct radv_meta_state *state = &device->meta_state;
- radv_DestroyPipeline(radv_device_to_handle(device), state->copy_vrs_htile_pipeline,
- &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->copy_vrs_htile_p_layout,
- &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(
- radv_device_to_handle(device), state->copy_vrs_htile_ds_layout, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->copy_vrs_htile_pipeline, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->copy_vrs_htile_p_layout, &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->copy_vrs_htile_ds_layout,
+ &state->alloc);
}
static nir_shader *
/* Get the HTILE addr from coordinates. */
nir_ssa_def *zero = nir_imm_int(&b, 0);
- nir_ssa_def *htile_addr = ac_nir_htile_addr_from_coord(
- &b, &device->physical_device->rad_info, &surf->u.gfx9.zs.htile_equation, htile_pitch,
- htile_slice_size, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero);
+ nir_ssa_def *htile_addr =
+ ac_nir_htile_addr_from_coord(&b, &device->physical_device->rad_info, &surf->u.gfx9.zs.htile_equation, htile_pitch,
+ htile_slice_size, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero);
/* Set up the input VRS image descriptor. */
- const struct glsl_type *vrs_sampler_type =
- glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT);
- nir_variable *input_vrs_img =
- nir_variable_create(b.shader, nir_var_uniform, vrs_sampler_type, "input_vrs_image");
+ const struct glsl_type *vrs_sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT);
+ nir_variable *input_vrs_img = nir_variable_create(b.shader, nir_var_uniform, vrs_sampler_type, "input_vrs_image");
input_vrs_img->data.descriptor_set = 0;
input_vrs_img->data.binding = 0;
}
static VkResult
-radv_device_init_meta_copy_vrs_htile_state(struct radv_device *device,
- struct radeon_surf *surf)
+radv_device_init_meta_copy_vrs_htile_state(struct radv_device *device, struct radeon_surf *surf)
{
struct radv_meta_state *state = &device->meta_state;
nir_shader *cs = build_copy_vrs_htile_shader(device, surf);
VkResult result;
- VkDescriptorSetLayoutCreateInfo ds_layout_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- {.binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info,
- &state->alloc, &state->copy_vrs_htile_ds_layout);
+ VkDescriptorSetLayoutCreateInfo ds_layout_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, &state->alloc,
+ &state->copy_vrs_htile_ds_layout);
if (result != VK_SUCCESS)
goto fail;
.layout = state->copy_vrs_htile_p_layout,
};
- result =
- radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &pipeline_info,
- NULL, &state->copy_vrs_htile_pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &pipeline_info, NULL,
+ &state->copy_vrs_htile_pipeline);
fail:
ralloc_free(cs);
return result;
}
void
-radv_copy_vrs_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *vrs_image,
- const VkRect2D *rect, struct radv_image *dst_image,
- struct radv_buffer *htile_buffer, bool read_htile_value)
+radv_copy_vrs_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *vrs_image, const VkRect2D *rect,
+ struct radv_image *dst_image, struct radv_buffer *htile_buffer, bool read_htile_value)
{
struct radv_device *device = cmd_buffer->device;
struct radv_meta_state *state = &device->meta_state;
assert(radv_image_has_htile(dst_image));
if (!cmd_buffer->device->meta_state.copy_vrs_htile_pipeline) {
- VkResult ret = radv_device_init_meta_copy_vrs_htile_state(cmd_buffer->device,
- &dst_image->planes[0].surface);
+ VkResult ret = radv_device_init_meta_copy_vrs_htile_state(cmd_buffer->device, &dst_image->planes[0].surface);
if (ret != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, ret);
return;
radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT, NULL);
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
state->copy_vrs_htile_pipeline);
radv_meta_push_descriptor_set(
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->copy_vrs_htile_p_layout, 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){
- {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo =
- (VkDescriptorImageInfo[]){
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&vrs_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }},
- {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(htile_buffer),
- .offset = 0,
- .range = htile_buffer->vk.size}}});
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(&vrs_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }},
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(htile_buffer),
+ .offset = 0,
+ .range = htile_buffer->vk.size}}});
const unsigned constants[5] = {
- rect->offset.x, rect->offset.y,
- dst_image->planes[0].surface.meta_pitch, dst_image->planes[0].surface.meta_slice_size,
+ rect->offset.x,
+ rect->offset.y,
+ dst_image->planes[0].surface.meta_pitch,
+ dst_image->planes[0].surface.meta_slice_size,
read_htile_value,
};
radv_meta_restore(&saved_state, cmd_buffer);
- cmd_buffer->state.flush_bits |=
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
}
nir_ssa_def *coord = get_global_ids(&b, 2);
nir_ssa_def *zero = nir_imm_int(&b, 0);
- coord = nir_imul(
- &b, coord,
- nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height));
-
- nir_ssa_def *src = ac_nir_dcc_addr_from_coord(&b, &dev->physical_device->rad_info, surf->bpe,
- &surf->u.gfx9.color.dcc_equation, src_dcc_pitch,
- src_dcc_height, zero, nir_channel(&b, coord, 0),
- nir_channel(&b, coord, 1), zero, zero, zero);
+ coord =
+ nir_imul(&b, coord, nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height));
+
+ nir_ssa_def *src = ac_nir_dcc_addr_from_coord(
+ &b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.dcc_equation, src_dcc_pitch, src_dcc_height,
+ zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero);
nir_ssa_def *dst = ac_nir_dcc_addr_from_coord(
- &b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation,
- dst_dcc_pitch, dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
- zero, zero, zero);
+ &b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation, dst_dcc_pitch,
+ dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero);
- nir_ssa_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref,
- nir_vec4(&b, src, src, src, src),
- nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0),
- .image_dim = dim);
+ nir_ssa_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, nir_vec4(&b, src, src, src, src),
+ nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = dim);
- nir_image_deref_store(&b, output_dcc_ref, nir_vec4(&b, dst, dst, dst, dst),
- nir_ssa_undef(&b, 1, 32), dcc_val, nir_imm_int(&b, 0), .image_dim = dim);
+ nir_image_deref_store(&b, output_dcc_ref, nir_vec4(&b, dst, dst, dst, dst), nir_ssa_undef(&b, 1, 32), dcc_val,
+ nir_imm_int(&b, 0), .image_dim = dim);
return b.shader;
}
struct radv_meta_state *state = &device->meta_state;
for (unsigned i = 0; i < ARRAY_SIZE(state->dcc_retile.pipeline); i++) {
- radv_DestroyPipeline(radv_device_to_handle(device), state->dcc_retile.pipeline[i],
- &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->dcc_retile.pipeline[i], &state->alloc);
}
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->dcc_retile.p_layout,
- &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->dcc_retile.ds_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->dcc_retile.p_layout, &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->dcc_retile.ds_layout,
+ &state->alloc);
/* Reset for next finish. */
memset(&state->dcc_retile, 0, sizeof(state->dcc_retile));
VkResult result = VK_SUCCESS;
nir_shader *cs = build_dcc_retile_compute_shader(device, surf);
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- {.binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
- &device->meta_state.alloc,
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
&device->meta_state.dcc_retile.ds_layout);
if (result != VK_SUCCESS)
goto cleanup;
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
};
- result =
- radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc, &device->meta_state.dcc_retile.p_layout);
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
+ &device->meta_state.dcc_retile.p_layout);
if (result != VK_SUCCESS)
goto cleanup;
.layout = device->meta_state.dcc_retile.p_layout,
};
- result = radv_compute_pipeline_create(
- radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, NULL,
- &device->meta_state.dcc_retile.pipeline[surf->u.gfx9.swizzle_mode]);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ NULL, &device->meta_state.dcc_retile.pipeline[surf->u.gfx9.swizzle_mode]);
if (result != VK_SUCCESS)
goto cleanup;
/* Compile pipelines if not already done so. */
if (!cmd_buffer->device->meta_state.dcc_retile.pipeline[swizzle_mode]) {
- VkResult ret =
- radv_device_init_meta_dcc_retile_state(cmd_buffer->device, &image->planes[0].surface);
+ VkResult ret = radv_device_init_meta_dcc_retile_state(cmd_buffer->device, &image->planes[0].surface);
if (ret != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, ret);
return;
}
}
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.dcc_retile.pipeline[swizzle_mode]);
for (unsigned i = 0; i < 2; ++i)
view_handles[i] = radv_buffer_view_to_handle(&views[i]);
- radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.dcc_retile.p_layout, 0, /* set */
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.dcc_retile.p_layout,
+ 0, /* set */
2, /* descriptorWriteCount */
(VkWriteDescriptorSet[]){
{
unsigned height = DIV_ROUND_UP(image->vk.extent.height, vk_format_get_blockheight(image->vk.format));
unsigned dcc_width = DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width);
- unsigned dcc_height =
- DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height);
+ unsigned dcc_height = DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height);
uint32_t constants[] = {
image->planes[0].surface.u.gfx9.color.dcc_pitch_max + 1,
image->planes[0].surface.u.gfx9.color.display_dcc_pitch_max + 1,
image->planes[0].surface.u.gfx9.color.display_dcc_height,
};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.dcc_retile.p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
- constants);
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.dcc_retile.p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, constants);
radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, 1);
radv_meta_restore(&saved_state, cmd_buffer);
- state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
+ state->flush_bits |=
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
}
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
- nir_ssa_def *block_size =
- nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
- b.shader->info.workgroup_size[2], 0);
+ nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
+ b.shader->info.workgroup_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
- nir_ssa_def *data = nir_image_deref_load(
- &b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id, nir_ssa_undef(&b, 1, 32),
- nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
+ nir_ssa_def *data =
+ nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id,
+ nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
/* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid
* creating a vmcnt(0) because it expects the L1 cache to keep memory
nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE,
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,
- nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0),
- .image_dim = GLSL_SAMPLER_DIM_2D);
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id, nir_ssa_undef(&b, 1, 32), data,
+ nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
return b.shader;
}
VkResult result = VK_SUCCESS;
nir_shader *cs = build_expand_depth_stencil_compute_shader(device);
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- {.binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(
- radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
- &device->meta_state.expand_depth_stencil_compute_ds_layout);
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
+ &device->meta_state.expand_depth_stencil_compute_ds_layout);
if (result != VK_SUCCESS)
goto cleanup;
.pPushConstantRanges = NULL,
};
- result = radv_CreatePipelineLayout(
- radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
- &device->meta_state.expand_depth_stencil_compute_p_layout);
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
+ &device->meta_state.expand_depth_stencil_compute_p_layout);
if (result != VK_SUCCESS)
goto cleanup;
.layout = device->meta_state.expand_depth_stencil_compute_p_layout,
};
- result = radv_CreateComputePipelines(
- radv_device_to_handle(device), device->meta_state.cache, 1,
- &vk_pipeline_info, NULL,
- &device->meta_state.expand_depth_stencil_compute_pipeline);
+ result = radv_CreateComputePipelines(radv_device_to_handle(device), device->meta_state.cache, 1, &vk_pipeline_info,
+ NULL, &device->meta_state.expand_depth_stencil_compute_pipeline);
if (result != VK_SUCCESS)
goto cleanup;
.pPushConstantRanges = NULL,
};
- return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc, layout);
+ return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, layout);
}
static VkResult
-create_pipeline(struct radv_device *device, uint32_t samples, VkPipelineLayout layout,
- enum radv_depth_op op, VkPipeline *pipeline)
+create_pipeline(struct radv_device *device, uint32_t samples, VkPipelineLayout layout, enum radv_depth_op op,
+ VkPipeline *pipeline)
{
VkResult result;
VkDevice device_h = radv_device_to_handle(device);
.resummarize_enable = op == DEPTH_RESUMMARIZE,
};
- result = radv_graphics_pipeline_create(device_h, device->meta_state.cache, &pipeline_create_info,
- &extra, &device->meta_state.alloc, pipeline);
+ result = radv_graphics_pipeline_create(device_h, device->meta_state.cache, &pipeline_create_info, &extra,
+ &device->meta_state.alloc, pipeline);
cleanup:
ralloc_free(fs_module);
struct radv_meta_state *state = &device->meta_state;
for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->depth_decomp[i].p_layout,
- &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->depth_decomp[i].p_layout, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->depth_decomp[i].decompress_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->depth_decomp[i].resummarize_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->depth_decomp[i].decompress_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->depth_decomp[i].resummarize_pipeline, &state->alloc);
}
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->expand_depth_stencil_compute_pipeline, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->expand_depth_stencil_compute_p_layout, &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(
- radv_device_to_handle(device), state->expand_depth_stencil_compute_ds_layout, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->expand_depth_stencil_compute_pipeline, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->expand_depth_stencil_compute_p_layout,
+ &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->expand_depth_stencil_compute_ds_layout, &state->alloc);
}
VkResult
if (!state->depth_decomp[samples_log2].decompress_pipeline) {
VkResult ret;
- ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].p_layout,
- DEPTH_DECOMPRESS, &state->depth_decomp[samples_log2].decompress_pipeline);
+ ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].p_layout, DEPTH_DECOMPRESS,
+ &state->depth_decomp[samples_log2].decompress_pipeline);
if (ret != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, ret);
return NULL;
}
- ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].p_layout,
- DEPTH_RESUMMARIZE, &state->depth_decomp[samples_log2].resummarize_pipeline);
+ ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].p_layout, DEPTH_RESUMMARIZE,
+ &state->depth_decomp[samples_log2].resummarize_pipeline);
if (ret != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, ret);
return NULL;
const VkRenderingInfo rendering_info = {
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
- .renderArea = {
- .offset = { 0, 0 },
- .extent = { width, height }
- },
+ .renderArea = {.offset = {0, 0}, .extent = {width, height}},
.layerCount = 1,
.pDepthAttachment = &depth_att,
.pStencilAttachment = &stencil_att,
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
VkPipeline *pipeline;
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_RENDER);
+ radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_RENDER);
pipeline = radv_get_depth_pipeline(cmd_buffer, image, subresourceRange, op);
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
- *pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
if (sample_locs) {
assert(image->vk.create_flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
* automatic layout transitions, otherwise the depth decompress
* pass uses the default HW locations.
*/
- radv_CmdSetSampleLocationsEXT(cmd_buffer_h,
- &(VkSampleLocationsInfoEXT){
- .sampleLocationsPerPixel = sample_locs->per_pixel,
- .sampleLocationGridSize = sample_locs->grid_size,
- .sampleLocationsCount = sample_locs->count,
- .pSampleLocations = sample_locs->locations,
- });
+ radv_CmdSetSampleLocationsEXT(cmd_buffer_h, &(VkSampleLocationsInfoEXT){
+ .sampleLocationsPerPixel = sample_locs->per_pixel,
+ .sampleLocationGridSize = sample_locs->grid_size,
+ .sampleLocationsCount = sample_locs->count,
+ .pSampleLocations = sample_locs->locations,
+ });
}
for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, subresourceRange); ++l) {
uint32_t width = radv_minify(image->vk.extent.width, subresourceRange->baseMipLevel + l);
uint32_t height = radv_minify(image->vk.extent.height, subresourceRange->baseMipLevel + l);
- radv_CmdSetViewport(cmd_buffer_h, 0, 1,
- &(VkViewport){.x = 0,
- .y = 0,
- .width = width,
- .height = height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f});
+ radv_CmdSetViewport(
+ cmd_buffer_h, 0, 1,
+ &(VkViewport){.x = 0, .y = 0, .width = width, .height = height, .minDepth = 0.0f, .maxDepth = 1.0f});
radv_CmdSetScissor(cmd_buffer_h, 0, 1,
&(VkRect2D){
assert(radv_image_is_tc_compat_htile(image));
- cmd_buffer->state.flush_bits |=
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
+ cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
+ radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.expand_depth_stencil_compute_pipeline);
height = radv_minify(image->vk.extent.height, subresourceRange->baseMipLevel + l);
for (uint32_t s = 0; s < vk_image_subresource_layer_count(&image->vk, subresourceRange); s++) {
- radv_image_view_init(
- &load_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo){
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(image),
- .viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = image->vk.format,
- .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
- .baseMipLevel = subresourceRange->baseMipLevel + l,
- .levelCount = 1,
- .baseArrayLayer = subresourceRange->baseArrayLayer + s,
- .layerCount = 1},
- },
- 0, &(struct radv_image_view_extra_create_info){.enable_compression = true});
- radv_image_view_init(
- &store_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo){
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(image),
- .viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = image->vk.format,
- .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
- .baseMipLevel = subresourceRange->baseMipLevel + l,
- .levelCount = 1,
- .baseArrayLayer = subresourceRange->baseArrayLayer + s,
- .layerCount = 1},
- },
- 0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
+ radv_image_view_init(&load_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = image->vk.format,
+ .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
+ .baseMipLevel = subresourceRange->baseMipLevel + l,
+ .levelCount = 1,
+ .baseArrayLayer = subresourceRange->baseArrayLayer + s,
+ .layerCount = 1},
+ },
+ 0, &(struct radv_image_view_extra_create_info){.enable_compression = true});
+ radv_image_view_init(&store_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = image->vk.format,
+ .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
+ .baseMipLevel = subresourceRange->baseMipLevel + l,
+ .levelCount = 1,
+ .baseArrayLayer = subresourceRange->baseArrayLayer + s,
+ .layerCount = 1},
+ },
+ 0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
radv_meta_push_descriptor_set(
- cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.expand_depth_stencil_compute_p_layout, 0, /* set */
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.expand_depth_stencil_compute_p_layout,
+ 0, /* set */
2, /* descriptorWriteCount */
(VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
radv_meta_restore(&saved_state, cmd_buffer);
- cmd_buffer->state.flush_bits |=
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
/* Initialize the HTILE metadata as "fully expanded". */
uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image);
nir_ssa_def *chan = cnt == 1 ? src : nir_channel(b, src, i);
for (unsigned j = 0; j < 4; ++j)
intermediate[j] = nir_ubfe_imm(b, chan, 8 * j, 8);
- v[i] = nir_ior(
- b, nir_ior(b, nir_ishl_imm(b, intermediate[0], 24), nir_ishl_imm(b, intermediate[1], 16)),
- nir_ior(b, nir_ishl_imm(b, intermediate[2], 8), nir_ishl_imm(b, intermediate[3], 0)));
+ v[i] = nir_ior(b, nir_ior(b, nir_ishl_imm(b, intermediate[0], 24), nir_ishl_imm(b, intermediate[1], 16)),
+ nir_ior(b, nir_ishl_imm(b, intermediate[2], 8), nir_ishl_imm(b, intermediate[3], 0)));
}
return cnt == 1 ? v[0] : nir_vec(b, v, cnt);
}
static nir_ssa_def *
etc1_color_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
{
- const unsigned table[8][2] = {{2, 8}, {5, 17}, {9, 29}, {13, 42},
- {18, 60}, {24, 80}, {33, 106}, {47, 183}};
+ const unsigned table[8][2] = {{2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}};
nir_ssa_def *upper = nir_ieq_imm(b, y, 1);
nir_ssa_def *result = NULL;
for (unsigned i = 0; i < 8; ++i) {
- nir_ssa_def *tmp =
- nir_bcsel(b, upper, nir_imm_int(b, table[i][1]), nir_imm_int(b, table[i][0]));
+ nir_ssa_def *tmp = nir_bcsel(b, upper, nir_imm_int(b, table[i][1]), nir_imm_int(b, table[i][0]));
if (result)
result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result);
else
}
static nir_ssa_def *
-decode_etc2_alpha(struct nir_builder *b, nir_ssa_def *alpha_payload, nir_ssa_def *linear_pixel,
- bool eac, nir_ssa_def *is_signed)
+decode_etc2_alpha(struct nir_builder *b, nir_ssa_def *alpha_payload, nir_ssa_def *linear_pixel, bool eac,
+ nir_ssa_def *is_signed)
{
alpha_payload = flip_endian(b, alpha_payload, 2);
nir_ssa_def *alpha_x = nir_channel(b, alpha_payload, 1);
multiplier = nir_imax(b, nir_imul_imm(b, multiplier, 8), nir_imm_int(b, 1));
}
- nir_ssa_def *lsb_index =
- nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x),
- nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 2));
+ nir_ssa_def *lsb_index = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x),
+ nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 2));
bit_offset = nir_iadd_imm(b, bit_offset, 2);
- nir_ssa_def *msb =
- nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x),
- nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 1));
- nir_ssa_def *mod =
- nir_ixor(b, etc1_alpha_modifier_lookup(b, table, lsb_index), nir_iadd_imm(b, msb, -1));
+ nir_ssa_def *msb = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x),
+ nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 1));
+ nir_ssa_def *mod = nir_ixor(b, etc1_alpha_modifier_lookup(b, table, lsb_index), nir_iadd_imm(b, msb, -1));
nir_ssa_def *a = nir_iadd(b, base, nir_imul(b, mod, multiplier));
nir_ssa_def *low_bound = nir_imm_int(b, 0);
if (eac) {
low_bound = nir_bcsel(b, is_signed, nir_imm_int(b, -1023), low_bound);
high_bound = nir_bcsel(b, is_signed, nir_imm_int(b, 1023), nir_imm_int(b, 2047));
- final_mult =
- nir_bcsel(b, is_signed, nir_imm_float(b, 1 / 1023.0), nir_imm_float(b, 1 / 2047.0));
+ final_mult = nir_bcsel(b, is_signed, nir_imm_float(b, 1 / 1023.0), nir_imm_float(b, 1 / 2047.0));
}
return nir_fmul(b, nir_i2f32(b, nir_iclamp(b, a, low_bound, high_bound)), final_mult);
static nir_shader *
build_shader(struct radv_device *dev)
{
- const struct glsl_type *sampler_type_2d =
- glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, GLSL_TYPE_UINT);
- const struct glsl_type *sampler_type_3d =
- glsl_sampler_type(GLSL_SAMPLER_DIM_3D, false, false, GLSL_TYPE_UINT);
- const struct glsl_type *img_type_2d =
- glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT);
- const struct glsl_type *img_type_3d =
- glsl_image_type(GLSL_SAMPLER_DIM_3D, false, GLSL_TYPE_FLOAT);
+ const struct glsl_type *sampler_type_2d = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, GLSL_TYPE_UINT);
+ const struct glsl_type *sampler_type_3d = glsl_sampler_type(GLSL_SAMPLER_DIM_3D, false, false, GLSL_TYPE_UINT);
+ const struct glsl_type *img_type_2d = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT);
+ const struct glsl_type *img_type_3d = glsl_image_type(GLSL_SAMPLER_DIM_3D, false, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_decode_etc");
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
- nir_variable *input_img_2d =
- nir_variable_create(b.shader, nir_var_uniform, sampler_type_2d, "s_tex_2d");
+ nir_variable *input_img_2d = nir_variable_create(b.shader, nir_var_uniform, sampler_type_2d, "s_tex_2d");
input_img_2d->data.descriptor_set = 0;
input_img_2d->data.binding = 0;
- nir_variable *input_img_3d =
- nir_variable_create(b.shader, nir_var_uniform, sampler_type_3d, "s_tex_3d");
+ nir_variable *input_img_3d = nir_variable_create(b.shader, nir_var_uniform, sampler_type_3d, "s_tex_3d");
input_img_2d->data.descriptor_set = 0;
input_img_2d->data.binding = 0;
- nir_variable *output_img_2d =
- nir_variable_create(b.shader, nir_var_image, img_type_2d, "out_img_2d");
+ nir_variable *output_img_2d = nir_variable_create(b.shader, nir_var_image, img_type_2d, "out_img_2d");
output_img_2d->data.descriptor_set = 0;
output_img_2d->data.binding = 1;
- nir_variable *output_img_3d =
- nir_variable_create(b.shader, nir_var_image, img_type_3d, "out_img_3d");
+ nir_variable *output_img_3d = nir_variable_create(b.shader, nir_var_image, img_type_3d, "out_img_3d");
output_img_3d->data.descriptor_set = 0;
output_img_3d->data.binding = 1;
nir_ssa_def *global_id = get_global_ids(&b, 3);
nir_ssa_def *consts = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
- nir_ssa_def *consts2 =
- nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
+ nir_ssa_def *consts2 = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
nir_ssa_def *offset = nir_channels(&b, consts, 7);
nir_ssa_def *format = nir_channel(&b, consts, 3);
nir_ssa_def *image_type = nir_channel(&b, consts2, 0);
nir_ssa_def *is_3d = nir_ieq_imm(&b, image_type, VK_IMAGE_TYPE_3D);
nir_ssa_def *coord = nir_iadd(&b, global_id, offset);
- nir_ssa_def *src_coord =
- nir_vec3(&b, nir_ushr_imm(&b, nir_channel(&b, coord, 0), 2),
- nir_ushr_imm(&b, nir_channel(&b, coord, 1), 2), nir_channel(&b, coord, 2));
+ nir_ssa_def *src_coord = nir_vec3(&b, nir_ushr_imm(&b, nir_channel(&b, coord, 0), 2),
+ nir_ushr_imm(&b, nir_channel(&b, coord, 1), 2), nir_channel(&b, coord, 2));
- nir_variable *payload_var =
- nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "payload");
+ nir_variable *payload_var = nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "payload");
nir_push_if(&b, is_3d);
{
- nir_ssa_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_3d), src_coord,
- nir_imm_int(&b, 0));
+ nir_ssa_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_3d), src_coord, nir_imm_int(&b, 0));
nir_store_var(&b, payload_var, color, 0xf);
}
nir_push_else(&b, NULL);
{
- nir_ssa_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_2d), src_coord,
- nir_imm_int(&b, 0));
+ nir_ssa_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_2d), src_coord, nir_imm_int(&b, 0));
nir_store_var(&b, payload_var, color, 0xf);
}
nir_pop_if(&b, NULL);
nir_ssa_def *pixel_coord = nir_iand_imm(&b, nir_channels(&b, coord, 3), 3);
- nir_ssa_def *linear_pixel = nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, pixel_coord, 0), 4),
- nir_channel(&b, pixel_coord, 1));
+ nir_ssa_def *linear_pixel =
+ nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, pixel_coord, 0), 4), nir_channel(&b, pixel_coord, 1));
nir_ssa_def *payload = nir_load_var(&b, payload_var);
- nir_variable *color =
- nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "color");
+ nir_variable *color = nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "color");
nir_store_var(&b, color, nir_imm_vec4(&b, 1.0, 0.0, 0.0, 1.0), 0xf);
nir_push_if(&b, nir_ilt_imm(&b, format, VK_FORMAT_EAC_R11_UNORM_BLOCK));
{
- nir_ssa_def *alpha_bits_8 =
- nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK);
- nir_ssa_def *alpha_bits_1 =
- nir_iand(&b, nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK),
- nir_ilt_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK));
+ nir_ssa_def *alpha_bits_8 = nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK);
+ nir_ssa_def *alpha_bits_1 = nir_iand(&b, nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK),
+ nir_ilt_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK));
nir_ssa_def *color_payload =
nir_bcsel(&b, alpha_bits_8, nir_channels(&b, payload, 0xC), nir_channels(&b, payload, 3));
nir_ssa_def *color_y = nir_channel(&b, color_payload, 0);
nir_ssa_def *color_x = nir_channel(&b, color_payload, 1);
nir_ssa_def *flip = nir_test_mask(&b, color_y, 1);
- nir_ssa_def *subblock = nir_ushr_imm(
- &b, nir_bcsel(&b, flip, nir_channel(&b, pixel_coord, 1), nir_channel(&b, pixel_coord, 0)),
- 1);
-
- nir_variable *punchthrough =
- nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "punchthrough");
- nir_ssa_def *punchthrough_init =
- nir_iand(&b, alpha_bits_1, nir_inot(&b, nir_test_mask(&b, color_y, 2)));
+ nir_ssa_def *subblock =
+ nir_ushr_imm(&b, nir_bcsel(&b, flip, nir_channel(&b, pixel_coord, 1), nir_channel(&b, pixel_coord, 0)), 1);
+
+ nir_variable *punchthrough = nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "punchthrough");
+ nir_ssa_def *punchthrough_init = nir_iand(&b, alpha_bits_1, nir_inot(&b, nir_test_mask(&b, color_y, 2)));
nir_store_var(&b, punchthrough, punchthrough_init, 0x1);
- nir_variable *etc1_compat =
- nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "etc1_compat");
+ nir_variable *etc1_compat = nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "etc1_compat");
nir_store_var(&b, etc1_compat, nir_imm_false(&b), 0x1);
nir_variable *alpha_result =
nir_variable_create(b.shader, nir_var_shader_temp, glsl_float_type(), "alpha_result");
nir_push_if(&b, alpha_bits_8);
{
- nir_store_var(
- &b, alpha_result,
- decode_etc2_alpha(&b, nir_channels(&b, payload, 3), linear_pixel, false, NULL), 1);
+ nir_store_var(&b, alpha_result, decode_etc2_alpha(&b, nir_channels(&b, payload, 3), linear_pixel, false, NULL),
+ 1);
}
nir_push_else(&b, NULL);
{
nir_pop_if(&b, NULL);
const struct glsl_type *uvec3_type = glsl_vector_type(GLSL_TYPE_UINT, 3);
- nir_variable *rgb_result =
- nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "rgb_result");
- nir_variable *base_rgb =
- nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "base_rgb");
+ nir_variable *rgb_result = nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "rgb_result");
+ nir_variable *base_rgb = nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "base_rgb");
nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 255, 0, 0), 0x7);
- nir_ssa_def *msb =
- nir_iand_imm(&b, nir_ushr(&b, color_x, nir_iadd_imm(&b, linear_pixel, 15)), 2);
+ nir_ssa_def *msb = nir_iand_imm(&b, nir_ushr(&b, color_x, nir_iadd_imm(&b, linear_pixel, 15)), 2);
nir_ssa_def *lsb = nir_iand_imm(&b, nir_ushr(&b, color_x, linear_pixel), 1);
- nir_push_if(
- &b, nir_iand(&b, nir_inot(&b, alpha_bits_1), nir_inot(&b, nir_test_mask(&b, color_y, 2))));
+ nir_push_if(&b, nir_iand(&b, nir_inot(&b, alpha_bits_1), nir_inot(&b, nir_test_mask(&b, color_y, 2))));
{
nir_store_var(&b, etc1_compat, nir_imm_true(&b), 1);
nir_ssa_def *tmp[3];
for (unsigned i = 0; i < 3; ++i)
tmp[i] = etc_extend(
&b,
- nir_iand_imm(&b,
- nir_ushr(&b, color_y,
- nir_isub_imm(&b, 28 - 8 * i, nir_imul_imm(&b, subblock, 4))),
+ nir_iand_imm(&b, nir_ushr(&b, color_y, nir_isub_imm(&b, 28 - 8 * i, nir_imul_imm(&b, subblock, 4))),
0xf),
4);
nir_store_var(&b, base_rgb, nir_vec(&b, tmp, 3), 0x7);
nir_push_if(&b, nir_ugt_imm(&b, r1, 31));
{
- nir_ssa_def *r0 = nir_ior(&b, nir_ubfe_imm(&b, color_y, 24, 2),
- nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 27, 2), 2));
+ nir_ssa_def *r0 =
+ nir_ior(&b, nir_ubfe_imm(&b, color_y, 24, 2), nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 27, 2), 2));
nir_ssa_def *g0 = nir_ubfe_imm(&b, color_y, 20, 4);
nir_ssa_def *b0 = nir_ubfe_imm(&b, color_y, 16, 4);
nir_ssa_def *r2 = nir_ubfe_imm(&b, color_y, 12, 4);
nir_ssa_def *g2 = nir_ubfe_imm(&b, color_y, 8, 4);
nir_ssa_def *b2 = nir_ubfe_imm(&b, color_y, 4, 4);
- nir_ssa_def *da = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 2), 1),
- nir_iand_imm(&b, color_y, 1));
+ nir_ssa_def *da =
+ nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 2), 1), nir_iand_imm(&b, color_y, 1));
nir_ssa_def *dist = etc2_distance_lookup(&b, da);
nir_ssa_def *index = nir_ior(&b, lsb, msb);
nir_store_var(&b, punchthrough,
- nir_iand(&b, nir_load_var(&b, punchthrough),
- nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)),
+ nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)),
0x1);
nir_push_if(&b, nir_ieq_imm(&b, index, 0));
{
nir_ssa_def *r0 = nir_ubfe_imm(&b, color_y, 27, 4);
nir_ssa_def *g0 = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 24, 3), 1),
nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 20), 1));
- nir_ssa_def *b0 = nir_ior(&b, nir_ubfe_imm(&b, color_y, 15, 3),
- nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 16), 8));
+ nir_ssa_def *b0 =
+ nir_ior(&b, nir_ubfe_imm(&b, color_y, 15, 3), nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 16), 8));
nir_ssa_def *r2 = nir_ubfe_imm(&b, color_y, 11, 4);
nir_ssa_def *g2 = nir_ubfe_imm(&b, color_y, 7, 4);
nir_ssa_def *b2 = nir_ubfe_imm(&b, color_y, 3, 4);
nir_ssa_def *da = nir_iand_imm(&b, color_y, 4);
nir_ssa_def *db = nir_iand_imm(&b, color_y, 1);
nir_ssa_def *d = nir_iadd(&b, da, nir_imul_imm(&b, db, 2));
- nir_ssa_def *d0 =
- nir_iadd(&b, nir_ishl_imm(&b, r0, 16), nir_iadd(&b, nir_ishl_imm(&b, g0, 8), b0));
- nir_ssa_def *d2 =
- nir_iadd(&b, nir_ishl_imm(&b, r2, 16), nir_iadd(&b, nir_ishl_imm(&b, g2, 8), b2));
+ nir_ssa_def *d0 = nir_iadd(&b, nir_ishl_imm(&b, r0, 16), nir_iadd(&b, nir_ishl_imm(&b, g0, 8), b0));
+ nir_ssa_def *d2 = nir_iadd(&b, nir_ishl_imm(&b, r2, 16), nir_iadd(&b, nir_ishl_imm(&b, g2, 8), b2));
d = nir_bcsel(&b, nir_uge(&b, d0, d2), nir_iadd_imm(&b, d, 1), d);
nir_ssa_def *dist = etc2_distance_lookup(&b, d);
- nir_ssa_def *base = nir_bcsel(&b, nir_ine_imm(&b, msb, 0), nir_vec3(&b, r2, g2, b2),
- nir_vec3(&b, r0, g0, b0));
+ nir_ssa_def *base =
+ nir_bcsel(&b, nir_ine_imm(&b, msb, 0), nir_vec3(&b, r2, g2, b2), nir_vec3(&b, r0, g0, b0));
base = etc_extend(&b, base, 4);
- base = nir_iadd(&b, base,
- nir_imul(&b, dist, nir_isub_imm(&b, 1, nir_imul_imm(&b, lsb, 2))));
+ base = nir_iadd(&b, base, nir_imul(&b, dist, nir_isub_imm(&b, 1, nir_imul_imm(&b, lsb, 2))));
nir_store_var(&b, rgb_result, base, 0x7);
nir_store_var(&b, punchthrough,
- nir_iand(&b, nir_load_var(&b, punchthrough),
- nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)),
+ nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)),
0x1);
}
nir_push_else(&b, NULL);
nir_push_if(&b, nir_ugt_imm(&b, b1, 31));
{
nir_ssa_def *r0 = nir_ubfe_imm(&b, color_y, 25, 6);
- nir_ssa_def *g0 = nir_ior(&b, nir_ubfe_imm(&b, color_y, 17, 6),
- nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 18), 0x40));
- nir_ssa_def *b0 =
- nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 11, 2), 3),
- nir_ior(&b, nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 11), 0x20),
- nir_ubfe_imm(&b, color_y, 7, 3)));
- nir_ssa_def *rh = nir_ior(&b, nir_iand_imm(&b, color_y, 1),
- nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 5), 1));
+ nir_ssa_def *g0 =
+ nir_ior(&b, nir_ubfe_imm(&b, color_y, 17, 6), nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 18), 0x40));
+ nir_ssa_def *b0 = nir_ior(
+ &b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 11, 2), 3),
+ nir_ior(&b, nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 11), 0x20), nir_ubfe_imm(&b, color_y, 7, 3)));
+ nir_ssa_def *rh =
+ nir_ior(&b, nir_iand_imm(&b, color_y, 1), nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 5), 1));
nir_ssa_def *rv = nir_ubfe_imm(&b, color_x, 13, 6);
nir_ssa_def *gh = nir_ubfe_imm(&b, color_x, 25, 7);
nir_ssa_def *gv = nir_ubfe_imm(&b, color_x, 6, 7);
bv = etc_extend(&b, bv, 6);
nir_ssa_def *rgb = nir_vec3(&b, r0, g0, b0);
- nir_ssa_def *dx = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rh, gh, bh), rgb),
- nir_channel(&b, pixel_coord, 0));
- nir_ssa_def *dy = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rv, gv, bv), rgb),
- nir_channel(&b, pixel_coord, 1));
+ nir_ssa_def *dx =
+ nir_imul(&b, nir_isub(&b, nir_vec3(&b, rh, gh, bh), rgb), nir_channel(&b, pixel_coord, 0));
+ nir_ssa_def *dy =
+ nir_imul(&b, nir_isub(&b, nir_vec3(&b, rv, gv, bv), rgb), nir_channel(&b, pixel_coord, 1));
rgb = nir_iadd(&b, rgb, nir_ishr_imm(&b, nir_iadd_imm(&b, nir_iadd(&b, dx, dy), 2), 2));
nir_store_var(&b, rgb_result, rgb, 0x7);
nir_store_var(&b, punchthrough, nir_imm_false(&b), 0x1);
nir_pop_if(&b, NULL);
nir_push_if(&b, nir_load_var(&b, etc1_compat));
{
- nir_ssa_def *etc1_table_index = nir_ubfe(
- &b, color_y, nir_isub_imm(&b, 5, nir_imul_imm(&b, subblock, 3)), nir_imm_int(&b, 3));
+ nir_ssa_def *etc1_table_index =
+ nir_ubfe(&b, color_y, nir_isub_imm(&b, 5, nir_imul_imm(&b, subblock, 3)), nir_imm_int(&b, 3));
nir_ssa_def *sgn = nir_isub_imm(&b, 1, msb);
sgn = nir_bcsel(&b, nir_load_var(&b, punchthrough), nir_imul(&b, sgn, lsb), sgn);
nir_store_var(&b, punchthrough,
- nir_iand(&b, nir_load_var(&b, punchthrough),
- nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)),
- 0x1);
- nir_ssa_def *off =
- nir_imul(&b, etc1_color_modifier_lookup(&b, etc1_table_index, lsb), sgn);
+ nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), 0x1);
+ nir_ssa_def *off = nir_imul(&b, etc1_color_modifier_lookup(&b, etc1_table_index, lsb), sgn);
nir_ssa_def *result = nir_iadd(&b, nir_load_var(&b, base_rgb), off);
nir_store_var(&b, rgb_result, result, 0x7);
}
nir_pop_if(&b, NULL);
nir_ssa_def *col[4];
for (unsigned i = 0; i < 3; ++i)
- col[i] = nir_fdiv_imm(&b, nir_i2f32(&b, nir_channel(&b, nir_load_var(&b, rgb_result), i)),
- 255.0);
+ col[i] = nir_fdiv_imm(&b, nir_i2f32(&b, nir_channel(&b, nir_load_var(&b, rgb_result), i)), 255.0);
col[3] = nir_load_var(&b, alpha_result);
nir_store_var(&b, color, nir_vec(&b, col, 4), 0xf);
}
nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11G11_SNORM_BLOCK));
nir_ssa_def *val[4];
for (int i = 0; i < 2; ++i) {
- val[i] = decode_etc2_alpha(&b, nir_channels(&b, payload, 3 << (2 * i)), linear_pixel, true,
- is_signed);
+ val[i] = decode_etc2_alpha(&b, nir_channels(&b, payload, 3 << (2 * i)), linear_pixel, true, is_signed);
}
val[2] = nir_imm_float(&b, 0.0);
val[3] = nir_imm_float(&b, 1.0);
nir_push_if(&b, is_3d);
{
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_3d)->dest.ssa, img_coord,
- nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
- .image_dim = GLSL_SAMPLER_DIM_3D);
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_3d)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
+ outval, nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_3D);
}
nir_push_else(&b, NULL);
{
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_2d)->dest.ssa, img_coord,
- nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
- .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_2d)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
+ outval, nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
}
nir_pop_if(&b, NULL);
return b.shader;
create_layout(struct radv_device *device)
{
VkResult result;
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- {.binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
- &device->meta_state.alloc,
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
&device->meta_state.etc_decode.ds_layout);
if (result != VK_SUCCESS)
goto fail;
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
};
- result =
- radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc, &device->meta_state.etc_decode.p_layout);
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
+ &device->meta_state.etc_decode.p_layout);
if (result != VK_SUCCESS)
goto fail;
return VK_SUCCESS;
.layout = device->meta_state.resolve_compute.p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info, NULL, pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ NULL, pipeline);
if (result != VK_SUCCESS)
goto fail;
{
struct radv_meta_state *state = &device->meta_state;
radv_DestroyPipeline(radv_device_to_handle(device), state->etc_decode.pipeline, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->etc_decode.p_layout,
- &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->etc_decode.ds_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->etc_decode.p_layout, &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->etc_decode.ds_layout,
+ &state->alloc);
}
static VkPipeline
}
static void
-decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
- struct radv_image_view *dst_iview, const VkOffset3D *offset, const VkExtent3D *extent)
+decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, struct radv_image_view *dst_iview,
+ const VkOffset3D *offset, const VkExtent3D *extent)
{
struct radv_device *device = cmd_buffer->device;
- radv_meta_push_descriptor_set(
- cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo =
- (VkDescriptorImageInfo[]){
- {.sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(src_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
- }},
- {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]){
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(dst_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }}});
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.resolve_compute.p_layout, 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {.sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+ }},
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(dst_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }}});
VkPipeline pipeline = radv_get_etc_decode_pipeline(cmd_buffer);
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
- pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
unsigned push_constants[5] = {
offset->x, offset->y, offset->z, src_iview->image->vk.format, src_iview->image->vk.image_type,
};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
- 0, 20, push_constants);
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_compute.p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 20, push_constants);
radv_unaligned_dispatch(cmd_buffer, extent->width, extent->height, extent->depth);
}
void
-radv_meta_decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- VkImageLayout layout, const VkImageSubresourceLayers *subresource,
- VkOffset3D offset, VkExtent3D extent)
+radv_meta_decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout layout,
+ const VkImageSubresourceLayers *subresource, VkOffset3D offset, VkExtent3D extent)
{
struct radv_meta_saved_state saved_state;
radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS |
- RADV_META_SAVE_DESCRIPTORS | RADV_META_SUSPEND_PREDICATING);
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS |
+ RADV_META_SUSPEND_PREDICATING);
uint32_t base_slice = radv_meta_get_iview_layer(image, subresource, &offset);
uint32_t slice_count = image->vk.image_type == VK_IMAGE_TYPE_3D ? extent.depth : subresource->layerCount;
extent = vk_image_sanitize_extent(&image->vk, extent);
offset = vk_image_sanitize_offset(&image->vk, offset);
- VkFormat load_format = vk_format_get_blocksize(image->vk.format) == 16
- ? VK_FORMAT_R32G32B32A32_UINT
- : VK_FORMAT_R32G32_UINT;
+ VkFormat load_format =
+ vk_format_get_blocksize(image->vk.format) == 16 ? VK_FORMAT_R32G32B32A32_UINT : VK_FORMAT_R32G32_UINT;
struct radv_image_view src_iview;
- radv_image_view_init(
- &src_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo){
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(image),
- .viewType = radv_meta_get_view_type(image),
- .format = load_format,
- .subresourceRange =
- {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = subresource->mipLevel,
- .levelCount = 1,
- .baseArrayLayer = 0,
- .layerCount = subresource->baseArrayLayer + subresource->layerCount,
- },
- },
- 0, NULL);
+ radv_image_view_init(&src_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = radv_meta_get_view_type(image),
+ .format = load_format,
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = subresource->mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = subresource->baseArrayLayer + subresource->layerCount,
+ },
+ },
+ 0, NULL);
VkFormat store_format;
switch (image->vk.format) {
store_format = VK_FORMAT_R8G8B8A8_UNORM;
}
struct radv_image_view dst_iview;
- radv_image_view_init(
- &dst_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo){
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(image),
- .viewType = radv_meta_get_view_type(image),
- .format = store_format,
- .subresourceRange =
- {
- .aspectMask = VK_IMAGE_ASPECT_PLANE_1_BIT,
- .baseMipLevel = subresource->mipLevel,
- .levelCount = 1,
- .baseArrayLayer = 0,
- .layerCount = subresource->baseArrayLayer + subresource->layerCount,
- },
- },
- 0, NULL);
+ radv_image_view_init(&dst_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = radv_meta_get_view_type(image),
+ .format = store_format,
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_PLANE_1_BIT,
+ .baseMipLevel = subresource->mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = subresource->baseArrayLayer + subresource->layerCount,
+ },
+ },
+ 0, NULL);
decode_etc(cmd_buffer, &src_iview, &dst_iview, &(VkOffset3D){offset.x, offset.y, base_slice},
&(VkExtent3D){extent.width, extent.height, slice_count});
output_img->data.binding = 1;
nir_ssa_def *global_id = get_global_ids(&b, 2);
- nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, global_id, 0),
- nir_channel(&b, global_id, 1),
- nir_ssa_undef(&b, 1, 32),
- nir_ssa_undef(&b, 1, 32));
+ nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, global_id, 0), nir_channel(&b, global_id, 1),
+ nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32));
- nir_ssa_def *data = nir_image_deref_load(
- &b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
- nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
+ nir_ssa_def *data =
+ nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, img_coord,
+ nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
/* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid
* creating a vmcnt(0) because it expects the L1 cache to keep memory
nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE,
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
- nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0),
- .image_dim = GLSL_SAMPLER_DIM_2D);
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32), data,
+ nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
return b.shader;
}
VkResult result = VK_SUCCESS;
nir_shader *cs = build_dcc_decompress_compute_shader(device);
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- {.binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(
- radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
- &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
+ &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
if (result != VK_SUCCESS)
goto cleanup;
.pPushConstantRanges = NULL,
};
- result = radv_CreatePipelineLayout(
- radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
- &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
+ &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
if (result != VK_SUCCESS)
goto cleanup;
.layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
};
- result = radv_compute_pipeline_create(
- radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info, NULL,
- &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ NULL, &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
if (result != VK_SUCCESS)
goto cleanup;
.pPushConstantRanges = NULL,
};
- return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc, layout);
+ return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, layout);
}
static VkResult
.attachmentCount = 1,
.pAttachments = (VkPipelineColorBlendAttachmentState[]){
{
- .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT |
+ VK_COLOR_COMPONENT_A_BIT,
},
}};
const VkPipelineRasterizationStateCreateInfo rs_state = {
.pColorAttachmentFormats = &color_format,
};
- result = radv_graphics_pipeline_create(
- device_h, device->meta_state.cache,
- &(VkGraphicsPipelineCreateInfo){
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .pNext = &rendering_create_info,
- .stageCount = 2,
- .pStages = stages,
-
- .pVertexInputState = &vi_state,
- .pInputAssemblyState = &ia_state,
-
- .pViewportState =
- &(VkPipelineViewportStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState = &rs_state,
- .pMultisampleState =
- &(VkPipelineMultisampleStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = 1,
- .sampleShadingEnable = false,
- .pSampleMask = NULL,
- .alphaToCoverageEnable = false,
- .alphaToOneEnable = false,
- },
- .pColorBlendState = &blend_state,
- .pDynamicState =
- &(VkPipelineDynamicStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 2,
- .pDynamicStates =
- (VkDynamicState[]){
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- },
- },
- .layout = layout,
- .renderPass = VK_NULL_HANDLE,
- .subpass = 0,
- },
- &(struct radv_graphics_pipeline_create_info){
- .use_rectlist = true,
- .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR,
- },
- &device->meta_state.alloc, &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline);
+ result = radv_graphics_pipeline_create(device_h, device->meta_state.cache,
+ &(VkGraphicsPipelineCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = &rendering_create_info,
+ .stageCount = 2,
+ .pStages = stages,
+
+ .pVertexInputState = &vi_state,
+ .pInputAssemblyState = &ia_state,
+
+ .pViewportState =
+ &(VkPipelineViewportStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState = &rs_state,
+ .pMultisampleState =
+ &(VkPipelineMultisampleStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1,
+ .sampleShadingEnable = false,
+ .pSampleMask = NULL,
+ .alphaToCoverageEnable = false,
+ .alphaToOneEnable = false,
+ },
+ .pColorBlendState = &blend_state,
+ .pDynamicState =
+ &(VkPipelineDynamicStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 2,
+ .pDynamicStates =
+ (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ },
+ },
+ .layout = layout,
+ .renderPass = VK_NULL_HANDLE,
+ .subpass = 0,
+ },
+ &(struct radv_graphics_pipeline_create_info){
+ .use_rectlist = true,
+ .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR,
+ },
+ &device->meta_state.alloc,
+ &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline);
if (result != VK_SUCCESS)
goto cleanup;
- result = radv_graphics_pipeline_create(
- device_h, device->meta_state.cache,
- &(VkGraphicsPipelineCreateInfo){
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .pNext = &rendering_create_info,
- .stageCount = 2,
- .pStages = stages,
-
- .pVertexInputState = &vi_state,
- .pInputAssemblyState = &ia_state,
-
- .pViewportState =
- &(VkPipelineViewportStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState = &rs_state,
- .pMultisampleState =
- &(VkPipelineMultisampleStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = 1,
- .sampleShadingEnable = false,
- .pSampleMask = NULL,
- .alphaToCoverageEnable = false,
- .alphaToOneEnable = false,
- },
- .pColorBlendState = &blend_state,
- .pDynamicState =
- &(VkPipelineDynamicStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 2,
- .pDynamicStates =
- (VkDynamicState[]){
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- },
- },
- .layout = layout,
- .renderPass = VK_NULL_HANDLE,
- .subpass = 0,
- },
- &(struct radv_graphics_pipeline_create_info){
- .use_rectlist = true,
- .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS,
- },
- &device->meta_state.alloc, &device->meta_state.fast_clear_flush.fmask_decompress_pipeline);
+ result = radv_graphics_pipeline_create(device_h, device->meta_state.cache,
+ &(VkGraphicsPipelineCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = &rendering_create_info,
+ .stageCount = 2,
+ .pStages = stages,
+
+ .pVertexInputState = &vi_state,
+ .pInputAssemblyState = &ia_state,
+
+ .pViewportState =
+ &(VkPipelineViewportStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState = &rs_state,
+ .pMultisampleState =
+ &(VkPipelineMultisampleStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1,
+ .sampleShadingEnable = false,
+ .pSampleMask = NULL,
+ .alphaToCoverageEnable = false,
+ .alphaToOneEnable = false,
+ },
+ .pColorBlendState = &blend_state,
+ .pDynamicState =
+ &(VkPipelineDynamicStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 2,
+ .pDynamicStates =
+ (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ },
+ },
+ .layout = layout,
+ .renderPass = VK_NULL_HANDLE,
+ .subpass = 0,
+ },
+ &(struct radv_graphics_pipeline_create_info){
+ .use_rectlist = true,
+ .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS,
+ },
+ &device->meta_state.alloc,
+ &device->meta_state.fast_clear_flush.fmask_decompress_pipeline);
if (result != VK_SUCCESS)
goto cleanup;
},
&(struct radv_graphics_pipeline_create_info){
.use_rectlist = true,
- .custom_blend_mode = device->physical_device->rad_info.gfx_level >= GFX11
- ? V_028808_CB_DCC_DECOMPRESS_GFX11
- : V_028808_CB_DCC_DECOMPRESS_GFX8,
+ .custom_blend_mode = device->physical_device->rad_info.gfx_level >= GFX11 ? V_028808_CB_DCC_DECOMPRESS_GFX11
+ : V_028808_CB_DCC_DECOMPRESS_GFX8,
},
&device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
if (result != VK_SUCCESS)
{
struct radv_meta_state *state = &device->meta_state;
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->fast_clear_flush.dcc_decompress_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->fast_clear_flush.fmask_decompress_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->fast_clear_flush.cmask_eliminate_pipeline, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.p_layout,
- &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->fast_clear_flush.fmask_decompress_pipeline,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->fast_clear_flush.cmask_eliminate_pipeline, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.p_layout, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->fast_clear_flush.dcc_decompress_compute_pipeline, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->fast_clear_flush.dcc_decompress_compute_p_layout,
+ radv_DestroyPipeline(radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_compute_pipeline,
+ &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_compute_p_layout,
&state->alloc);
device->vk.dispatch_table.DestroyDescriptorSetLayout(
- radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_compute_ds_layout,
- &state->alloc);
+ radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_compute_ds_layout, &state->alloc);
}
static VkResult
}
static void
-radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image, uint64_t pred_offset,
- bool value)
+radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ uint64_t pred_offset, bool value)
{
uint64_t va = 0;
static void
radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- const VkImageSubresourceRange *range, int level, int layer,
- bool flush_cb)
+ const VkImageSubresourceRange *range, int level, int layer, bool flush_cb)
{
struct radv_device *device = cmd_buffer->device;
struct radv_image_view iview;
const VkRenderingInfo rendering_info = {
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
- .renderArea = {
- .offset = { 0, 0 },
- .extent = { width, height }
- },
+ .renderArea = {.offset = {0, 0}, .extent = {width, height}},
.layerCount = 1,
.colorAttachmentCount = 1,
.pColorAttachments = &color_att,
radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
if (flush_cb)
- cmd_buffer->state.flush_bits |=
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
+ cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
if (flush_cb)
- cmd_buffer->state.flush_bits |=
- radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
+ cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
}
}
- radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE |
- RADV_META_SAVE_RENDER);
+ radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_RENDER);
if (pred_offset) {
pred_offset += 8 * subresourceRange->baseMipLevel;
cmd_buffer->state.predicating = true;
}
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
- *pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, subresourceRange); ++l) {
uint32_t width, height;
width = radv_minify(image->vk.extent.width, subresourceRange->baseMipLevel + l);
height = radv_minify(image->vk.extent.height, subresourceRange->baseMipLevel + l);
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
- &(VkViewport){.x = 0,
- .y = 0,
- .width = width,
- .height = height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f});
+ radv_CmdSetViewport(
+ radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkViewport){.x = 0, .y = 0, .width = width, .height = height, .minDepth = 0.0f, .maxDepth = 1.0f});
radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
&(VkRect2D){
}
}
- cmd_buffer->state.flush_bits |=
- RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
if (pred_offset) {
pred_offset += 8 * subresourceRange->baseMipLevel;
if (cmd_buffer->state.predication_type != -1) {
/* Restore previous conditional rendering user state. */
- si_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type,
- cmd_buffer->state.predication_op,
+ si_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type, cmd_buffer->state.predication_op,
cmd_buffer->state.predication_va);
}
}
struct radv_image_view store_iview = {0};
struct radv_device *device = cmd_buffer->device;
- cmd_buffer->state.flush_bits |=
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
+ cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
}
}
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
+ radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
height = radv_minify(image->vk.extent.height, subresourceRange->baseMipLevel + l);
for (uint32_t s = 0; s < vk_image_subresource_layer_count(&image->vk, subresourceRange); s++) {
- radv_image_view_init(
- &load_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo){
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(image),
- .viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = image->vk.format,
- .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = subresourceRange->baseMipLevel + l,
- .levelCount = 1,
- .baseArrayLayer = subresourceRange->baseArrayLayer + s,
- .layerCount = 1},
- },
- 0, &(struct radv_image_view_extra_create_info){.enable_compression = true});
- radv_image_view_init(
- &store_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo){
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(image),
- .viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = image->vk.format,
- .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = subresourceRange->baseMipLevel + l,
- .levelCount = 1,
- .baseArrayLayer = subresourceRange->baseArrayLayer + s,
- .layerCount = 1},
- },
- 0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
+ radv_image_view_init(&load_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = image->vk.format,
+ .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = subresourceRange->baseMipLevel + l,
+ .levelCount = 1,
+ .baseArrayLayer = subresourceRange->baseArrayLayer + s,
+ .layerCount = 1},
+ },
+ 0, &(struct radv_image_view_extra_create_info){.enable_compression = true});
+ radv_image_view_init(&store_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = image->vk.format,
+ .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = subresourceRange->baseMipLevel + l,
+ .levelCount = 1,
+ .baseArrayLayer = subresourceRange->baseArrayLayer + s,
+ .layerCount = 1},
+ },
+ 0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
radv_meta_push_descriptor_set(
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, 0, /* set */
- 2, /* descriptorWriteCount */
+ 2, /* descriptorWriteCount */
(VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
.dstArrayElement = 0,
radv_meta_restore(&saved_state, cmd_buffer);
- cmd_buffer->state.flush_bits |=
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
/* Initialize the DCC metadata as "fully expanded". */
cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, subresourceRange, 0xffffffff);
const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, false, GLSL_TYPE_FLOAT);
- nir_builder b =
- radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_fmask_copy_cs_-%d", samples);
+ nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_fmask_copy_cs_-%d", samples);
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
- nir_ssa_def *block_size =
- nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
- b.shader->info.workgroup_size[2]);
+ nir_ssa_def *block_size = nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
+ b.shader->info.workgroup_size[2]);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
/* Get coordinates. */
nir_ssa_def *src_coord = nir_trim_vector(&b, global_id, 2);
- nir_ssa_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0),
- nir_channel(&b, src_coord, 1),
- nir_ssa_undef(&b, 1, 32),
- nir_ssa_undef(&b, 1, 32));
+ nir_ssa_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0), nir_channel(&b, src_coord, 1),
+ nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32));
nir_tex_src frag_mask_srcs[] = {{
.src_type = nir_tex_src_coord,
.src = nir_src_for_ssa(src_coord),
}};
- nir_ssa_def *frag_mask = nir_build_tex_deref_instr(&b, nir_texop_fragment_mask_fetch_amd,
- nir_build_deref_var(&b, input_img), NULL,
- ARRAY_SIZE(frag_mask_srcs), frag_mask_srcs);
+ nir_ssa_def *frag_mask =
+ nir_build_tex_deref_instr(&b, nir_texop_fragment_mask_fetch_amd, nir_build_deref_var(&b, input_img), NULL,
+ ARRAY_SIZE(frag_mask_srcs), frag_mask_srcs);
/* Get the maximum sample used in this fragment. */
nir_ssa_def *max_sample_index = nir_imm_int(&b, 0);
for (uint32_t s = 0; s < samples; s++) {
/* max_sample_index = MAX2(max_sample_index, (frag_mask >> (s * 4)) & 0xf) */
max_sample_index = nir_umax(&b, max_sample_index,
- nir_ubitfield_extract(&b, frag_mask, nir_imm_int(&b, 4 * s),
- nir_imm_int(&b, 4)));
+ nir_ubitfield_extract(&b, frag_mask, nir_imm_int(&b, 4 * s), nir_imm_int(&b, 4)));
}
nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter");
nir_ssa_def *sample_id = nir_load_var(&b, counter);
nir_tex_src frag_fetch_srcs[] = {{
- .src_type = nir_tex_src_coord,
- .src = nir_src_for_ssa(src_coord),
- }, {
- .src_type = nir_tex_src_ms_index,
- .src = nir_src_for_ssa(sample_id),
- }};
- nir_ssa_def *outval = nir_build_tex_deref_instr(&b, nir_texop_fragment_fetch_amd,
- nir_build_deref_var(&b, input_img), NULL,
- ARRAY_SIZE(frag_fetch_srcs), frag_fetch_srcs);
-
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord,
- sample_id, outval, nir_imm_int(&b, 0),
- .image_dim = GLSL_SAMPLER_DIM_MS);
+ .src_type = nir_tex_src_coord,
+ .src = nir_src_for_ssa(src_coord),
+ },
+ {
+ .src_type = nir_tex_src_ms_index,
+ .src = nir_src_for_ssa(sample_id),
+ }};
+ nir_ssa_def *outval =
+ nir_build_tex_deref_instr(&b, nir_texop_fragment_fetch_amd, nir_build_deref_var(&b, input_img), NULL,
+ ARRAY_SIZE(frag_fetch_srcs), frag_fetch_srcs);
+
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord, sample_id, outval,
+ nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_MS);
radv_break_on_count(&b, counter, max_sample_index);
}
{
struct radv_meta_state *state = &device->meta_state;
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fmask_copy.p_layout,
- &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->fmask_copy.ds_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fmask_copy.p_layout, &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->fmask_copy.ds_layout,
+ &state->alloc);
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
radv_DestroyPipeline(radv_device_to_handle(device), state->fmask_copy.pipeline[i], &state->alloc);
.layout = state->fmask_copy.p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache,
- &vk_pipeline_info, NULL, pipeline);
+ result =
+ radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &vk_pipeline_info, NULL, pipeline);
ralloc_free(cs);
return result;
}
.pImmutableSamplers = NULL},
}};
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
- &device->meta_state.alloc,
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
&device->meta_state.fmask_copy.ds_layout);
if (result != VK_SUCCESS)
return result;
}
if (!device->meta_state.fmask_copy.p_layout) {
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.fmask_copy.ds_layout,
- .pushConstantRangeCount = 0,
- .pPushConstantRanges = NULL};
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc,
+ VkPipelineLayoutCreateInfo pl_create_info = {.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.fmask_copy.ds_layout,
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = NULL};
+
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
&device->meta_state.fmask_copy.p_layout);
if (result != VK_SUCCESS)
return result;
}
- return create_fmask_copy_pipeline(device, 1u << samples_log2,
- &device->meta_state.fmask_copy.pipeline[samples_log2]);
+ return create_fmask_copy_pipeline(device, 1u << samples_log2, &device->meta_state.fmask_copy.pipeline[samples_log2]);
}
VkResult
assert(src_image->planes[0].surface.cmask_size == dst_image->planes[0].surface.cmask_size &&
src_image->planes[0].surface.fmask_size == dst_image->planes[0].surface.fmask_size);
assert(src_image->planes[0].surface.fmask_offset + src_image->planes[0].surface.fmask_size ==
- src_image->planes[0].surface.cmask_offset &&
+ src_image->planes[0].surface.cmask_offset &&
dst_image->planes[0].surface.fmask_offset + dst_image->planes[0].surface.fmask_size ==
- dst_image->planes[0].surface.cmask_offset);
+ dst_image->planes[0].surface.cmask_offset);
/* Copy CMASK+FMASK. */
size = src_image->planes[0].surface.cmask_size + src_image->planes[0].surface.fmask_size;
src_offset = src_image->bindings[0].offset + src_image->planes[0].surface.fmask_offset;
dst_offset = dst_image->bindings[0].offset + dst_image->planes[0].surface.fmask_offset;
- radv_copy_buffer(cmd_buffer, src_image->bindings[0].bo, dst_image->bindings[0].bo,
- src_offset, dst_offset, size);
+ radv_copy_buffer(cmd_buffer, src_image->bindings[0].bo, dst_image->bindings[0].bo, src_offset, dst_offset, size);
}
bool
-radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image *src_image, const struct radv_image *dst_image,
- unsigned num_rects, const struct radv_meta_blit2d_rect *rects)
+radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
+ const struct radv_image *dst_image, unsigned num_rects,
+ const struct radv_meta_blit2d_rect *rects)
{
/* TODO: Test on pre GFX10 chips. */
if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX10)
return false;
/* Source/destination images must have identical swizzle. */
- if (src_image->planes[0].surface.fmask_tile_swizzle !=
- dst_image->planes[0].surface.fmask_tile_swizzle ||
+ if (src_image->planes[0].surface.fmask_tile_swizzle != dst_image->planes[0].surface.fmask_tile_swizzle ||
src_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode !=
- dst_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode)
+ dst_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode)
return false;
return true;
},
0, NULL);
- radv_meta_push_descriptor_set(
- cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
- cmd_buffer->device->meta_state.fmask_copy.p_layout, 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo =
- (VkDescriptorImageInfo[]){
- {.sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&src_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
- }},
- {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]){
- {.sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&dst_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
- }}});
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+ cmd_buffer->device->meta_state.fmask_copy.p_layout, 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {.sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(&src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+ }},
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {.sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(&dst_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+ }}});
radv_unaligned_dispatch(cmd_buffer, src->image->vk.extent.width, src->image->vk.extent.height, 1);
#include "radv_private.h"
#include "vk_format.h"
-static VkResult radv_device_init_meta_fmask_expand_state_internal(struct radv_device *device,
- uint32_t samples_log2);
+static VkResult radv_device_init_meta_fmask_expand_state_internal(struct radv_device *device, uint32_t samples_log2);
static nir_shader *
build_fmask_expand_compute_shader(struct radv_device *device, int samples)
{
- const struct glsl_type *type =
- glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, GLSL_TYPE_FLOAT);
+ const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, true, GLSL_TYPE_FLOAT);
- nir_builder b =
- radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "meta_fmask_expand_cs-%d", samples);
+ nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "meta_fmask_expand_cs-%d", samples);
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
tex_vals[i] = nir_txf_ms_deref(&b, input_img_deref, tex_coord, nir_imm_int(&b, i));
}
- nir_ssa_def *img_coord =
- nir_vec4(&b, nir_channel(&b, tex_coord, 0), nir_channel(&b, tex_coord, 1),
- nir_channel(&b, tex_coord, 2), nir_ssa_undef(&b, 1, 32));
+ nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, tex_coord, 0), nir_channel(&b, tex_coord, 1),
+ nir_channel(&b, tex_coord, 2), nir_ssa_undef(&b, 1, 32));
for (uint32_t i = 0; i < samples; i++) {
- nir_image_deref_store(&b, output_img_deref, img_coord, nir_imm_int(&b, i), tex_vals[i],
- nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_MS, .image_array = true);
+ nir_image_deref_store(&b, output_img_deref, img_coord, nir_imm_int(&b, i), tex_vals[i], nir_imm_int(&b, 0),
+ .image_dim = GLSL_SAMPLER_DIM_MS, .image_array = true);
}
return b.shader;
return;
}
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
+ radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
VkPipeline pipeline = device->meta_state.fmask_expand.pipeline[samples_log2];
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
- pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- cmd_buffer->state.flush_bits |= radv_dst_access_flush(
- cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, image);
+ cmd_buffer->state.flush_bits |=
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, image);
radv_image_view_init(&iview, device,
&(VkImageViewCreateInfo){
},
0, NULL);
- radv_meta_push_descriptor_set(
- cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
- cmd_buffer->device->meta_state.fmask_expand.p_layout, 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo =
- (VkDescriptorImageInfo[]){
- {.sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
- }},
- {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]){
- {.sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
- }}});
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+ cmd_buffer->device->meta_state.fmask_expand.p_layout, 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {.sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(&iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+ }},
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {.sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(&iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+ }}});
radv_unaligned_dispatch(cmd_buffer, image->vk.extent.width, image->vk.extent.height, layer_count);
radv_meta_restore(&saved_state, cmd_buffer);
cmd_buffer->state.flush_bits |=
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
/* Re-initialize FMASK in fully expanded mode. */
cmd_buffer->state.flush_bits |= radv_init_fmask(cmd_buffer, image, subresourceRange);
struct radv_meta_state *state = &device->meta_state;
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
- radv_DestroyPipeline(radv_device_to_handle(device), state->fmask_expand.pipeline[i],
- &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->fmask_expand.pipeline[i], &state->alloc);
}
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fmask_expand.p_layout,
- &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fmask_expand.p_layout, &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(
- radv_device_to_handle(device), state->fmask_expand.ds_layout, &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->fmask_expand.ds_layout,
+ &state->alloc);
}
static VkResult
.layout = state->fmask_expand.p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache,
- &vk_pipeline_info, NULL, pipeline);
+ result =
+ radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &vk_pipeline_info, NULL, pipeline);
ralloc_free(cs);
return result;
.pImmutableSamplers = NULL},
}};
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
- &state->alloc, &state->fmask_expand.ds_layout);
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &state->alloc,
+ &state->fmask_expand.ds_layout);
if (result != VK_SUCCESS)
return result;
}
.pPushConstantRanges = NULL,
};
- result = radv_CreatePipelineLayout(radv_device_to_handle(device), &color_create_info,
- &state->alloc, &state->fmask_expand.p_layout);
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &color_create_info, &state->alloc,
+ &state->fmask_expand.p_layout);
if (result != VK_SUCCESS)
return result;
}
- result = create_fmask_expand_pipeline(device, 1 << samples_log2,
- &state->fmask_expand.pipeline[samples_log2]);
+ result = create_fmask_expand_pipeline(device, 1 << samples_log2, &state->fmask_expand.pipeline[samples_log2]);
return result;
}
}
static VkResult
-create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkFormat format,
- VkPipeline *pipeline)
+create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkFormat format, VkPipeline *pipeline)
{
VkResult result;
VkDevice device_h = radv_device_to_handle(device);
};
if (!device->meta_state.resolve.p_layout) {
- result =
- radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc, &device->meta_state.resolve.p_layout);
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
+ &device->meta_state.resolve.p_layout);
if (result != VK_SUCCESS)
goto cleanup;
}
- VkFormat color_formats[2] = { format, format };
+ VkFormat color_formats[2] = {format, format};
const VkPipelineRenderingCreateInfo rendering_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
.colorAttachmentCount = 2,
struct radv_meta_state *state = &device->meta_state;
for (uint32_t j = 0; j < NUM_META_FS_KEYS; j++) {
- radv_DestroyPipeline(radv_device_to_handle(device), state->resolve.pipeline[j],
- &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve.pipeline[j], &state->alloc);
}
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve.p_layout,
- &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve.p_layout, &state->alloc);
}
VkResult
}
static void
-emit_resolve(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
- const struct radv_image *dst_image, VkFormat vk_format)
+emit_resolve(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image, const struct radv_image *dst_image,
+ VkFormat vk_format)
{
struct radv_device *device = cmd_buffer->device;
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT, src_image) |
radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, dst_image);
- radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
- device->meta_state.resolve.pipeline[fs_key]);
+ radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.resolve.pipeline[fs_key]);
radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
- cmd_buffer->state.flush_bits |=
- radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, dst_image);
+ cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, dst_image);
}
enum radv_resolve_method {
};
static bool
-image_hw_resolve_compat(const struct radv_device *device, struct radv_image *src_image,
- struct radv_image *dst_image)
+image_hw_resolve_compat(const struct radv_device *device, struct radv_image *src_image, struct radv_image *dst_image)
{
if (device->physical_device->rad_info.gfx_level >= GFX9) {
- return dst_image->planes[0].surface.u.gfx9.swizzle_mode ==
- src_image->planes[0].surface.u.gfx9.swizzle_mode;
+ return dst_image->planes[0].surface.u.gfx9.swizzle_mode == src_image->planes[0].surface.u.gfx9.swizzle_mode;
} else {
- return dst_image->planes[0].surface.micro_tile_mode ==
- src_image->planes[0].surface.micro_tile_mode;
+ return dst_image->planes[0].surface.micro_tile_mode == src_image->planes[0].surface.micro_tile_mode;
}
}
static void
-radv_pick_resolve_method_images(struct radv_device *device, struct radv_image *src_image,
- VkFormat src_format, struct radv_image *dst_image,
- unsigned dst_level, VkImageLayout dst_image_layout,
- struct radv_cmd_buffer *cmd_buffer,
- enum radv_resolve_method *method)
+radv_pick_resolve_method_images(struct radv_device *device, struct radv_image *src_image, VkFormat src_format,
+ struct radv_image *dst_image, unsigned dst_level, VkImageLayout dst_image_layout,
+ struct radv_cmd_buffer *cmd_buffer, enum radv_resolve_method *method)
{
- uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf,
- cmd_buffer->qf);
+ uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
if (vk_format_is_color(src_format)) {
/* Using the fragment resolve path is currently a hint to
* re-initialize it after resolving using compute.
* TODO: Add support for layered and int to the fragment path.
*/
- if (radv_layout_dcc_compressed(device, dst_image, dst_level, dst_image_layout,
- queue_mask)) {
+ if (radv_layout_dcc_compressed(device, dst_image, dst_level, dst_image_layout, queue_mask)) {
*method = RESOLVE_FRAGMENT;
} else if (!image_hw_resolve_compat(device, src_image, dst_image)) {
/* The micro tile mode only needs to match for the HW
assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
- const uint32_t src_base_layer =
- radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset);
+ const uint32_t src_base_layer = radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset);
- const uint32_t dst_base_layer =
- radv_meta_get_iview_layer(dst_image, ®ion->dstSubresource, ®ion->dstOffset);
+ const uint32_t dst_base_layer = radv_meta_get_iview_layer(dst_image, ®ion->dstSubresource, ®ion->dstOffset);
/**
* From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images
const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent);
const struct VkOffset3D dstOffset = vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
- uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf,
- cmd_buffer->qf);
+ uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
- if (radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel,
- dst_image_layout, queue_mask)) {
+ if (radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, dst_image_layout,
+ queue_mask)) {
VkImageSubresourceRange range = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = region->dstSubresource.mipLevel,
}
VkRect2D resolve_area = {
- .offset = { dstOffset.x, dstOffset.y },
- .extent = { extent.width, extent.height },
+ .offset = {dstOffset.x, dstOffset.y},
+ .extent = {extent.width, extent.height},
};
radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
}
static void
-resolve_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
- VkImageLayout src_image_layout, struct radv_image *dst_image,
- VkImageLayout dst_image_layout, const VkImageResolve2 *region,
+resolve_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout,
+ struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageResolve2 *region,
enum radv_resolve_method resolve_method)
{
switch (resolve_method) {
case RESOLVE_HW:
- radv_meta_resolve_hardware_image(cmd_buffer, src_image, src_image_layout, dst_image,
- dst_image_layout, region);
+ radv_meta_resolve_hardware_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region);
break;
case RESOLVE_FRAGMENT:
radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region);
- radv_meta_resolve_fragment_image(cmd_buffer, src_image, src_image_layout, dst_image,
- dst_image_layout, region);
+ radv_meta_resolve_fragment_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region);
break;
case RESOLVE_COMPUTE:
radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region);
- radv_meta_resolve_compute_image(cmd_buffer, src_image, src_image->vk.format, src_image_layout,
- dst_image, dst_image->vk.format, dst_image_layout, region);
+ radv_meta_resolve_compute_image(cmd_buffer, src_image, src_image->vk.format, src_image_layout, dst_image,
+ dst_image->vk.format, dst_image_layout, region);
break;
default:
assert(!"Invalid resolve method selected");
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdResolveImage2(VkCommandBuffer commandBuffer,
- const VkResolveImageInfo2 *pResolveImageInfo)
+radv_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 *pResolveImageInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_image, src_image, pResolveImageInfo->srcImage);
VkImageLayout src_image_layout = pResolveImageInfo->srcImageLayout;
VkImageLayout dst_image_layout = pResolveImageInfo->dstImageLayout;
const struct radv_physical_device *pdevice = cmd_buffer->device->physical_device;
- enum radv_resolve_method resolve_method =
- pdevice->rad_info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW;
+ enum radv_resolve_method resolve_method = pdevice->rad_info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW;
/* we can use the hw resolve only for single full resolves */
if (pResolveImageInfo->regionCount == 1) {
- if (pResolveImageInfo->pRegions[0].srcOffset.x ||
- pResolveImageInfo->pRegions[0].srcOffset.y || pResolveImageInfo->pRegions[0].srcOffset.z)
+ if (pResolveImageInfo->pRegions[0].srcOffset.x || pResolveImageInfo->pRegions[0].srcOffset.y ||
+ pResolveImageInfo->pRegions[0].srcOffset.z)
resolve_method = RESOLVE_COMPUTE;
- if (pResolveImageInfo->pRegions[0].dstOffset.x ||
- pResolveImageInfo->pRegions[0].dstOffset.y || pResolveImageInfo->pRegions[0].dstOffset.z)
+ if (pResolveImageInfo->pRegions[0].dstOffset.x || pResolveImageInfo->pRegions[0].dstOffset.y ||
+ pResolveImageInfo->pRegions[0].dstOffset.z)
resolve_method = RESOLVE_COMPUTE;
if (pResolveImageInfo->pRegions[0].extent.width != src_image->vk.extent.width ||
const VkImageResolve2 *region = &pResolveImageInfo->pRegions[r];
radv_pick_resolve_method_images(cmd_buffer->device, src_image, src_image->vk.format, dst_image,
- region->dstSubresource.mipLevel, dst_image_layout,
- cmd_buffer, &resolve_method);
+ region->dstSubresource.mipLevel, dst_image_layout, cmd_buffer, &resolve_method);
- resolve_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region,
- resolve_method);
+ resolve_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region, resolve_method);
}
}
{
struct radv_meta_saved_state saved_state;
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_RENDER);
+ radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_RENDER);
VkRect2D *resolve_area = &saved_state.render.area;
VkImageLayout dst_layout = saved_state.render.color_att[i].resolve_layout;
struct radv_image *dst_img = dst_iview->image;
- uint32_t queue_mask = radv_image_queue_family_mask(dst_img, cmd_buffer->qf,
- cmd_buffer->qf);
+ uint32_t queue_mask = radv_image_queue_family_mask(dst_img, cmd_buffer->qf, cmd_buffer->qf);
- if (radv_layout_dcc_compressed(cmd_buffer->device, dst_img, dst_iview->vk.base_mip_level,
- dst_layout, queue_mask)) {
+ if (radv_layout_dcc_compressed(cmd_buffer->device, dst_img, dst_iview->vk.base_mip_level, dst_layout,
+ queue_mask)) {
VkImageSubresourceRange range = {
.aspectMask = dst_iview->vk.aspects,
.baseMipLevel = dst_iview->vk.base_mip_level,
radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
- VkResult ret = build_resolve_pipeline(
- cmd_buffer->device, radv_format_meta_fs_key(cmd_buffer->device, dst_iview->vk.format));
+ VkResult ret =
+ build_resolve_pipeline(cmd_buffer->device, radv_format_meta_fs_key(cmd_buffer->device, dst_iview->vk.format));
if (ret != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, ret);
continue;
{
const struct radv_physical_device *pdevice = cmd_buffer->device->physical_device;
const struct radv_rendering_state *render = &cmd_buffer->state.render;
- enum radv_resolve_method resolve_method =
- pdevice->rad_info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW;
+ enum radv_resolve_method resolve_method = pdevice->rad_info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW;
bool has_color_resolve = false;
for (uint32_t i = 0; i < render->color_att_count; ++i) {
struct radv_image_view *dst_iview = render->ds_att.resolve_iview;
VkImageLayout dst_layout = render->ds_att.resolve_layout;
- radv_pick_resolve_method_images(cmd_buffer->device, src_iview->image, src_iview->vk.format,
- dst_iview->image, dst_iview->vk.base_mip_level, dst_layout,
- cmd_buffer, &resolve_method);
+ radv_pick_resolve_method_images(cmd_buffer->device, src_iview->image, src_iview->vk.format, dst_iview->image,
+ dst_iview->vk.base_mip_level, dst_layout, cmd_buffer, &resolve_method);
- if ((src_iview->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
- render->ds_att.resolve_mode != VK_RESOLVE_MODE_NONE) {
+ if ((src_iview->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && render->ds_att.resolve_mode != VK_RESOLVE_MODE_NONE) {
if (resolve_method == RESOLVE_FRAGMENT) {
- radv_depth_stencil_resolve_rendering_fs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT,
- render->ds_att.resolve_mode);
+ radv_depth_stencil_resolve_rendering_fs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT, render->ds_att.resolve_mode);
} else {
assert(resolve_method == RESOLVE_COMPUTE);
- radv_depth_stencil_resolve_rendering_cs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT,
- render->ds_att.resolve_mode);
+ radv_depth_stencil_resolve_rendering_cs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT, render->ds_att.resolve_mode);
}
}
struct radv_image *dst_img = dst_iview->image;
radv_pick_resolve_method_images(cmd_buffer->device, src_img, src_iview->vk.format, dst_img,
- dst_iview->vk.base_mip_level, dst_layout,
- cmd_buffer, &resolve_method);
+ dst_iview->vk.base_mip_level, dst_layout, cmd_buffer, &resolve_method);
VkImageResolve2 region = {
.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2,
- .extent = {
- .width = resolve_area.extent.width,
- .height = resolve_area.extent.height,
- .depth = 1,
- },
+ .extent =
+ {
+ .width = resolve_area.extent.width,
+ .height = resolve_area.extent.height,
+ .depth = 1,
+ },
.srcSubresource =
(VkImageSubresourceLayers){
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseArrayLayer = dst_iview->vk.base_array_layer,
.layerCount = layer_count,
},
- .srcOffset = { resolve_area.offset.x, resolve_area.offset.y, 0 },
- .dstOffset = { resolve_area.offset.x, resolve_area.offset.y, 0 },
+ .srcOffset = {resolve_area.offset.x, resolve_area.offset.y, 0},
+ .dstOffset = {resolve_area.offset.x, resolve_area.offset.y, 0},
};
switch (resolve_method) {
case RESOLVE_COMPUTE:
radv_decompress_resolve_src(cmd_buffer, src_iview->image, src_layout, ®ion);
- radv_cmd_buffer_resolve_rendering_cs(cmd_buffer, src_iview, src_layout, dst_iview,
- dst_layout, ®ion);
+ radv_cmd_buffer_resolve_rendering_cs(cmd_buffer, src_iview, src_layout, dst_iview, dst_layout, ®ion);
break;
case RESOLVE_FRAGMENT:
radv_decompress_resolve_src(cmd_buffer, src_iview->image, src_layout, ®ion);
- radv_cmd_buffer_resolve_rendering_fs(cmd_buffer, src_iview, src_layout, dst_iview,
- dst_layout);
+ radv_cmd_buffer_resolve_rendering_fs(cmd_buffer, src_iview, src_layout, dst_iview, dst_layout);
break;
default:
unreachable("Invalid resolve method");
radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
VkImageLayout src_image_layout, const VkImageResolve2 *region)
{
- const uint32_t src_base_layer =
- radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset);
+ const uint32_t src_base_layer = radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset);
VkImageMemoryBarrier2 barrier = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
.oldLayout = src_image_layout,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.image = radv_image_to_handle(src_image),
- .subresourceRange = (VkImageSubresourceRange){
- .aspectMask = region->srcSubresource.aspectMask,
- .baseMipLevel = region->srcSubresource.mipLevel,
- .levelCount = 1,
- .baseArrayLayer = src_base_layer,
- .layerCount = region->srcSubresource.layerCount,
- },
+ .subresourceRange =
+ (VkImageSubresourceRange){
+ .aspectMask = region->srcSubresource.aspectMask,
+ .baseMipLevel = region->srcSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = src_base_layer,
+ .layerCount = region->srcSubresource.layerCount,
+ },
};
VkSampleLocationsInfoEXT sample_loc_info;
/* If the depth/stencil image uses different sample
* locations, we need them during HTILE decompressions.
*/
- struct radv_sample_locations_state *sample_locs =
- &cmd_buffer->state.render.sample_locations;
+ struct radv_sample_locations_state *sample_locs = &cmd_buffer->state.render.sample_locations;
sample_loc_info = (VkSampleLocationsInfoEXT){
.sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT,
build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
{
enum glsl_base_type img_base_type = is_integer ? GLSL_TYPE_UINT : GLSL_TYPE_FLOAT;
- const struct glsl_type *sampler_type =
- glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
+ const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, img_base_type);
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs-%d-%s", samples,
is_integer ? "int" : (is_srgb ? "srgb" : "float"));
if (is_srgb)
outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
- nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0),
- nir_channel(&b, dst_coord, 1),
- nir_ssa_undef(&b, 1, 32),
- nir_ssa_undef(&b, 1, 32));
+ nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1),
+ nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32));
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
- nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
- .image_dim = GLSL_SAMPLER_DIM_2D);
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
+ outval, nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
return b.shader;
}
VkResolveModeFlagBits resolve_mode)
{
enum glsl_base_type img_base_type = index == DEPTH_RESOLVE ? GLSL_TYPE_FLOAT : GLSL_TYPE_UINT;
- const struct glsl_type *sampler_type =
- glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, img_base_type);
+ const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, img_base_type);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, img_base_type);
- nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs_%s-%s-%d",
- index == DEPTH_RESOLVE ? "depth" : "stencil",
- get_resolve_mode_str(resolve_mode), samples);
+ nir_builder b =
+ radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs_%s-%s-%d",
+ index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples);
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
- nir_ssa_def *resolve_coord = nir_iadd(&b,
- nir_trim_vector(&b, global_id, 2),
- offset);
+ nir_ssa_def *resolve_coord = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), offset);
- nir_ssa_def *img_coord = nir_vec3(&b, nir_channel(&b, resolve_coord, 0),
- nir_channel(&b, resolve_coord, 1),
- nir_channel(&b, global_id, 2));
+ nir_ssa_def *img_coord =
+ nir_vec3(&b, nir_channel(&b, resolve_coord, 0), nir_channel(&b, resolve_coord, 1), nir_channel(&b, global_id, 2));
nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
nir_ssa_def *outval = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, 0));
nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
nir_channel(&b, img_coord, 2), nir_ssa_undef(&b, 1, 32));
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
- nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
- .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32), outval,
+ nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
return b.shader;
}
* two descriptors one for the image being sampled
* one for the buffer being written.
*/
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- {.binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
- &device->meta_state.alloc,
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
&device->meta_state.resolve_compute.ds_layout);
if (result != VK_SUCCESS)
goto fail;
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
};
- result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc,
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
&device->meta_state.resolve_compute.p_layout);
if (result != VK_SUCCESS)
goto fail;
}
static VkResult
-create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb,
- VkPipeline *pipeline)
+create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb, VkPipeline *pipeline)
{
VkResult result;
.layout = device->meta_state.resolve_compute.p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info, NULL, pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ NULL, pipeline);
if (result != VK_SUCCESS)
goto fail;
return VK_SUCCESS;
}
- nir_shader *cs =
- build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode);
+ nir_shader *cs = build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode);
/* compute shader */
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
.layout = device->meta_state.resolve_compute.p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info, NULL, pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ NULL, pipeline);
if (result != VK_SUCCESS)
goto fail;
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
uint32_t samples = 1 << i;
- res = create_resolve_pipeline(device, samples, false, false,
- &state->resolve_compute.rc[i].pipeline);
+ res = create_resolve_pipeline(device, samples, false, false, &state->resolve_compute.rc[i].pipeline);
if (res != VK_SUCCESS)
return res;
- res = create_resolve_pipeline(device, samples, true, false,
- &state->resolve_compute.rc[i].i_pipeline);
+ res = create_resolve_pipeline(device, samples, true, false, &state->resolve_compute.rc[i].i_pipeline);
if (res != VK_SUCCESS)
return res;
- res = create_resolve_pipeline(device, samples, false, true,
- &state->resolve_compute.rc[i].srgb_pipeline);
+ res = create_resolve_pipeline(device, samples, false, true, &state->resolve_compute.rc[i].srgb_pipeline);
if (res != VK_SUCCESS)
return res;
- res = create_depth_stencil_resolve_pipeline(
- device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT,
- &state->resolve_compute.depth[i].average_pipeline);
+ res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT,
+ &state->resolve_compute.depth[i].average_pipeline);
if (res != VK_SUCCESS)
return res;
- res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
- VK_RESOLVE_MODE_MAX_BIT,
+ res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_MAX_BIT,
&state->resolve_compute.depth[i].max_pipeline);
if (res != VK_SUCCESS)
return res;
- res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
- VK_RESOLVE_MODE_MIN_BIT,
+ res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_MIN_BIT,
&state->resolve_compute.depth[i].min_pipeline);
if (res != VK_SUCCESS)
return res;
- res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
- VK_RESOLVE_MODE_MAX_BIT,
+ res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, VK_RESOLVE_MODE_MAX_BIT,
&state->resolve_compute.stencil[i].max_pipeline);
if (res != VK_SUCCESS)
return res;
- res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
- VK_RESOLVE_MODE_MIN_BIT,
+ res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, VK_RESOLVE_MODE_MIN_BIT,
&state->resolve_compute.stencil[i].min_pipeline);
if (res != VK_SUCCESS)
return res;
}
- res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE,
- VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
+ res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
&state->resolve_compute.depth_zero_pipeline);
if (res != VK_SUCCESS)
return res;
- return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE,
- VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
+ return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
&state->resolve_compute.stencil_zero_pipeline);
}
{
struct radv_meta_state *state = &device->meta_state;
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
- radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline,
- &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline,
- &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.rc[i].srgb_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].srgb_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.depth[i].average_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].average_pipeline,
+ &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.depth[i].max_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].max_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.depth[i].min_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].min_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.stencil[i].max_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil[i].max_pipeline,
+ &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.stencil[i].min_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil[i].min_pipeline,
+ &state->alloc);
}
- radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline,
- &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline,
- &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline, &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(
- radv_device_to_handle(device), state->resolve_compute.ds_layout, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout,
- &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->resolve_compute.ds_layout,
+ &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout, &state->alloc);
}
static VkPipeline *
}
static void
-emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
- struct radv_image_view *dst_iview, const VkOffset2D *src_offset,
- const VkOffset2D *dst_offset, const VkExtent2D *resolve_extent)
+emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, struct radv_image_view *dst_iview,
+ const VkOffset2D *src_offset, const VkOffset2D *dst_offset, const VkExtent2D *resolve_extent)
{
struct radv_device *device = cmd_buffer->device;
VkPipeline *pipeline;
- radv_meta_push_descriptor_set(
- cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo =
- (VkDescriptorImageInfo[]){
- {.sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(src_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
- }},
- {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]){
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(dst_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }}});
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.resolve_compute.p_layout, 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {.sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+ }},
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(dst_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }}});
pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
- *pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
unsigned push_constants[4] = {
src_offset->x,
dst_offset->x,
dst_offset->y,
};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
- 0, 16, push_constants);
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_compute.p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
}
const uint32_t samples_log2 = ffs(samples) - 1;
VkPipeline *pipeline;
- radv_meta_push_descriptor_set(
- cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo =
- (VkDescriptorImageInfo[]){
- {.sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(src_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
- }},
- {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]){
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(dst_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }}});
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.resolve_compute.p_layout, 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {.sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+ }},
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(dst_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }}});
switch (resolve_mode) {
case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
}
}
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
- *pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
- uint32_t push_constants[2] = { resolve_offset->x, resolve_offset->y };
+ uint32_t push_constants[2] = {resolve_offset->x, resolve_offset->y};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
- 0, sizeof(push_constants), push_constants);
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_compute.p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), push_constants);
- radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height,
- resolve_extent->depth);
+ radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, resolve_extent->depth);
}
void
-radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
- VkFormat src_format, VkImageLayout src_image_layout,
- struct radv_image *dst_image, VkFormat dst_format,
+radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkFormat src_format,
+ VkImageLayout src_image_layout, struct radv_image *dst_image, VkFormat dst_format,
VkImageLayout dst_image_layout, const VkImageResolve2 *region)
{
struct radv_meta_saved_state saved_state;
/* For partial resolves, DCC should be decompressed before resolving
* because the metadata is re-initialized to the uncompressed after.
*/
- uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf,
- cmd_buffer->qf);
+ uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dst_image) &&
- radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel,
- dst_image_layout, queue_mask) &&
+ radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, dst_image_layout,
+ queue_mask) &&
(region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||
- region->extent.width != dst_image->vk.extent.width ||
- region->extent.height != dst_image->vk.extent.height ||
+ region->extent.width != dst_image->vk.extent.width || region->extent.height != dst_image->vk.extent.height ||
region->extent.depth != dst_image->vk.extent.depth)) {
radv_decompress_dcc(cmd_buffer, dst_image,
&(VkImageSubresourceRange){
});
}
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
- const uint32_t src_base_layer =
- radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset);
+ const uint32_t src_base_layer = radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset);
- const uint32_t dst_base_layer =
- radv_meta_get_iview_layer(dst_image, ®ion->dstSubresource, ®ion->dstOffset);
+ const uint32_t dst_base_layer = radv_meta_get_iview_layer(dst_image, ®ion->dstSubresource, ®ion->dstOffset);
const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent);
const struct VkOffset3D srcOffset = vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
0, NULL);
emit_resolve(cmd_buffer, &src_iview, &dst_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},
- &(VkOffset2D){dstOffset.x, dstOffset.y},
- &(VkExtent2D){extent.width, extent.height});
+ &(VkOffset2D){dstOffset.x, dstOffset.y}, &(VkExtent2D){extent.width, extent.height});
radv_image_view_finish(&src_iview);
radv_image_view_finish(&dst_iview);
radv_meta_restore(&saved_state, cmd_buffer);
if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dst_image) &&
- radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel,
- dst_image_layout, queue_mask)) {
+ radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, dst_image_layout,
+ queue_mask)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
}
void
-radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *src_iview, VkImageLayout src_layout,
- struct radv_image_view *dst_iview, VkImageLayout dst_layout,
- const VkImageResolve2 *region)
+radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
+ VkImageLayout src_layout, struct radv_image_view *dst_iview,
+ VkImageLayout dst_layout, const VkImageResolve2 *region)
{
- radv_meta_resolve_compute_image(cmd_buffer,
- src_iview->image, src_iview->vk.format, src_layout,
- dst_iview->image, dst_iview->vk.format, dst_layout,
- region);
+ radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk.format, src_layout, dst_iview->image,
+ dst_iview->vk.format, dst_layout, region);
- cmd_buffer->state.flush_bits |=
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
}
void
-radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer,
- VkImageAspectFlags aspects,
+radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlags aspects,
VkResolveModeFlagBits resolve_mode)
{
const struct radv_rendering_state *render = &cmd_buffer->state.render;
radv_decompress_resolve_src(cmd_buffer, src_image, src_layout, ®ion);
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
+ radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
struct radv_image_view *dst_iview = render->ds_att.resolve_iview;
VkImageLayout dst_layout = render->ds_att.resolve_layout;
&(VkExtent3D){resolve_area.extent.width, resolve_area.extent.height, layer_count},
aspects, resolve_mode);
- cmd_buffer->state.flush_bits |=
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
- uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf,
- cmd_buffer->qf);
+ uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, dst_layout, queue_mask)) {
VkImageSubresourceRange range = {0};
{
enum glsl_base_type img_base_type = is_integer ? GLSL_TYPE_UINT : GLSL_TYPE_FLOAT;
const struct glsl_type *vec4 = glsl_vec4_type();
- const struct glsl_type *sampler_type =
- glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
+ const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
- nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs-%d-%s",
- samples, is_integer ? "int" : "float");
+ nir_builder b =
+ radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : "float");
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
input_img->data.descriptor_set = 0;
nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);
- nir_ssa_def *img_coord = nir_trim_vector(&b,
- nir_iadd(&b, pos_int, src_offset),
- 2);
+ nir_ssa_def *img_coord = nir_trim_vector(&b, nir_iadd(&b, pos_int, src_offset), 2);
nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
radv_meta_build_resolve_shader_core(dev, &b, is_integer, samples, input_img, color, img_coord);
/*
* one descriptors for the image being sampled
*/
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 1,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
- &device->meta_state.alloc,
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
&device->meta_state.resolve_fragment.ds_layout);
if (result != VK_SUCCESS)
goto fail;
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8},
};
- result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc,
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
&device->meta_state.resolve_fragment.p_layout);
if (result != VK_SUCCESS)
goto fail;
.scissorCount = 1,
},
.pRasterizationState =
- &(VkPipelineRasterizationStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
- .depthBiasConstantFactor = 0.0f,
- .depthBiasClamp = 0.0f,
- .depthBiasSlopeFactor = 0.0f,
- .lineWidth = 1.0f},
+ &(VkPipelineRasterizationStateCreateInfo){.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ .depthBiasConstantFactor = 0.0f,
+ .depthBiasClamp = 0.0f,
+ .depthBiasSlopeFactor = 0.0f,
+ .lineWidth = 1.0f},
.pMultisampleState =
&(VkPipelineMultisampleStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.attachmentCount = 1,
.pAttachments =
(VkPipelineColorBlendAttachmentState[]){
- {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
- VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT},
+ {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT},
},
- .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }},
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}},
.pDynamicState =
&(VkPipelineDynamicStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
- result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info, &radv_pipeline_info,
- &device->meta_state.alloc, pipeline);
+ result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ &radv_pipeline_info, &device->meta_state.alloc, pipeline);
ralloc_free(vs);
ralloc_free(fs);
{
enum glsl_base_type img_base_type = index == DEPTH_RESOLVE ? GLSL_TYPE_FLOAT : GLSL_TYPE_UINT;
const struct glsl_type *vec4 = glsl_vec4_type();
- const struct glsl_type *sampler_type =
- glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
+ const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
- nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs_%s-%s-%d",
- index == DEPTH_RESOLVE ? "depth" : "stencil",
- get_resolve_mode_str(resolve_mode), samples);
+ nir_builder b =
+ radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs_%s-%s-%d",
+ index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples);
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
input_img->data.descriptor_set = 0;
}
uint32_t samples = 1 << samples_log2;
- nir_shader *fs =
- build_depth_stencil_resolve_fragment_shader(device, samples, index, resolve_mode);
+ nir_shader *fs = build_depth_stencil_resolve_fragment_shader(device, samples, index, resolve_mode);
nir_shader *vs = radv_meta_build_nir_vs_generate_vertices(device);
VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
.writeMask = UINT32_MAX,
.reference = 0u,
},
- .back = {
- .failOp = stencil_op,
- .passOp = stencil_op,
- .depthFailOp = stencil_op,
- .compareOp = VK_COMPARE_OP_ALWAYS,
- .compareMask = UINT32_MAX,
- .writeMask = UINT32_MAX,
- .reference = 0u,
- },
+ .back =
+ {
+ .failOp = stencil_op,
+ .passOp = stencil_op,
+ .depthFailOp = stencil_op,
+ .compareOp = VK_COMPARE_OP_ALWAYS,
+ .compareMask = UINT32_MAX,
+ .writeMask = UINT32_MAX,
+ .reference = 0u,
+ },
.minDepthBounds = 0.0f,
.maxDepthBounds = 1.0f};
},
.pDepthStencilState = &depth_stencil_state,
.pRasterizationState =
- &(VkPipelineRasterizationStateCreateInfo){
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
- .depthBiasConstantFactor = 0.0f,
- .depthBiasClamp = 0.0f,
- .depthBiasSlopeFactor = 0.0f,
- .lineWidth = 1.0f},
+ &(VkPipelineRasterizationStateCreateInfo){.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ .depthBiasConstantFactor = 0.0f,
+ .depthBiasClamp = 0.0f,
+ .depthBiasSlopeFactor = 0.0f,
+ .lineWidth = 1.0f},
.pMultisampleState =
&(VkPipelineMultisampleStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.attachmentCount = 0,
.pAttachments =
(VkPipelineColorBlendAttachmentState[]){
- {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
- VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT},
+ {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT},
},
- .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }},
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}},
.pDynamicState =
&(VkPipelineDynamicStateCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
- result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &vk_pipeline_info, &radv_pipeline_info,
- &device->meta_state.alloc, pipeline);
+ result = radv_graphics_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
+ &radv_pipeline_info, &device->meta_state.alloc, pipeline);
ralloc_free(vs);
ralloc_free(fs);
return res;
}
- res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE,
- VK_RESOLVE_MODE_AVERAGE_BIT);
+ res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT);
if (res != VK_SUCCESS)
return res;
- res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE,
- VK_RESOLVE_MODE_MIN_BIT);
+ res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE, VK_RESOLVE_MODE_MIN_BIT);
if (res != VK_SUCCESS)
return res;
- res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE,
- VK_RESOLVE_MODE_MAX_BIT);
+ res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE, VK_RESOLVE_MODE_MAX_BIT);
if (res != VK_SUCCESS)
return res;
- res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE,
- VK_RESOLVE_MODE_MIN_BIT);
+ res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE, VK_RESOLVE_MODE_MIN_BIT);
if (res != VK_SUCCESS)
return res;
- res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE,
- VK_RESOLVE_MODE_MAX_BIT);
+ res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE, VK_RESOLVE_MODE_MAX_BIT);
if (res != VK_SUCCESS)
return res;
}
- res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE,
- VK_RESOLVE_MODE_SAMPLE_ZERO_BIT);
+ res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT);
if (res != VK_SUCCESS)
return res;
- return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE,
- VK_RESOLVE_MODE_SAMPLE_ZERO_BIT);
+ return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT);
}
void
struct radv_meta_state *state = &device->meta_state;
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_fragment.rc[i].pipeline[j], &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.rc[i].pipeline[j], &state->alloc);
}
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_fragment.depth[i].average_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.depth[i].average_pipeline,
+ &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_fragment.depth[i].max_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.depth[i].max_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_fragment.depth[i].min_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.depth[i].min_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_fragment.stencil[i].max_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.stencil[i].max_pipeline,
+ &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_fragment.stencil[i].min_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.stencil[i].min_pipeline,
+ &state->alloc);
}
- radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.depth_zero_pipeline,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_fragment.stencil_zero_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.depth_zero_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.stencil_zero_pipeline, &state->alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(
- radv_device_to_handle(device), state->resolve_fragment.ds_layout, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_fragment.p_layout,
- &state->alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->resolve_fragment.ds_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_fragment.p_layout, &state->alloc);
}
static VkPipeline *
}
static void
-emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
- struct radv_image_view *dst_iview, const VkOffset2D *src_offset,
- const VkOffset2D *dst_offset)
+emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, struct radv_image_view *dst_iview,
+ const VkOffset2D *src_offset, const VkOffset2D *dst_offset)
{
struct radv_device *device = cmd_buffer->device;
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
VkPipeline *pipeline;
radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
- cmd_buffer->device->meta_state.resolve_fragment.p_layout,
- 0, /* set */
- 1, /* descriptorWriteCount */
+ cmd_buffer->device->meta_state.resolve_fragment.p_layout, 0, /* set */
+ 1, /* descriptorWriteCount */
(VkWriteDescriptorSet[]){
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
src_offset->x - dst_offset->x,
src_offset->y - dst_offset->y,
};
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.resolve_fragment.p_layout, VK_SHADER_STAGE_FRAGMENT_BIT,
- 0, 8, push_constants);
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_fragment.p_layout,
+ VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8, push_constants);
pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview, dst_iview);
VkPipeline *pipeline;
radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
- cmd_buffer->device->meta_state.resolve_fragment.p_layout,
- 0, /* set */
- 1, /* descriptorWriteCount */
+ cmd_buffer->device->meta_state.resolve_fragment.p_layout, 0, /* set */
+ 1, /* descriptorWriteCount */
(VkWriteDescriptorSet[]){
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
}
}
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
- *pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
&(VkViewport){.x = resolve_offset->x,
unsigned dst_layout = radv_meta_dst_layout_from_layout(dst_image_layout);
VkImageLayout layout = radv_meta_dst_layout_to_layout(dst_layout);
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
- const uint32_t src_base_layer =
- radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset);
+ const uint32_t src_base_layer = radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset);
- const uint32_t dst_base_layer =
- radv_meta_get_iview_layer(dst_image, ®ion->dstSubresource, ®ion->dstOffset);
+ const uint32_t dst_base_layer = radv_meta_get_iview_layer(dst_image, ®ion->dstSubresource, ®ion->dstOffset);
const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent);
const struct VkOffset3D srcOffset = vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
const struct VkOffset3D dstOffset = vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
VkRect2D resolve_area = {
- .offset = { dstOffset.x, dstOffset.y },
- .extent = { extent.width, extent.height },
+ .offset = {dstOffset.x, dstOffset.y},
+ .extent = {extent.width, extent.height},
};
radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
}
void
-radv_cmd_buffer_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *src_iview, VkImageLayout src_layout,
- struct radv_image_view *dst_iview, VkImageLayout dst_layout)
+radv_cmd_buffer_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
+ VkImageLayout src_layout, struct radv_image_view *dst_iview,
+ VkImageLayout dst_layout)
{
const struct radv_rendering_state *render = &cmd_buffer->state.render;
struct radv_meta_saved_state saved_state;
radv_meta_save(
&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS |
- RADV_META_SAVE_RENDER);
+ RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_RENDER);
radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
&(VkViewport){.x = resolve_area.offset.x,
* Depth/stencil resolves for the current rendering.
*/
void
-radv_depth_stencil_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer,
- VkImageAspectFlags aspects,
+radv_depth_stencil_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlags aspects,
VkResolveModeFlagBits resolve_mode)
{
const struct radv_rendering_state *render = &cmd_buffer->state.render;
radv_decompress_resolve_src(cmd_buffer, src_image, src_layout, ®ion);
radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_DESCRIPTORS |
- RADV_META_SAVE_RENDER);
+ RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_RENDER);
struct radv_image_view *dst_iview = saved_state.render.ds_att.resolve_iview;
.renderArea = saved_state.render.area,
.layerCount = saved_state.render.layer_count,
.viewMask = saved_state.render.view_mask,
- .pDepthAttachment = (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ?
- &depth_att : NULL,
- .pStencilAttachment = (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ?
- &stencil_att : NULL,
+ .pDepthAttachment = (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? &depth_att : NULL,
+ .pStencilAttachment = (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? &stencil_att : NULL,
};
radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
},
0, NULL);
- emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, dst_iview, &resolve_area.offset,
- &resolve_area.extent, aspects, resolve_mode);
+ emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, dst_iview, &resolve_area.offset, &resolve_area.extent, aspects,
+ resolve_mode);
radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
struct radv_device;
void radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
- const struct radv_pipeline_layout *layout,
- const struct radv_shader_info *info,
+ const struct radv_pipeline_layout *layout, const struct radv_shader_info *info,
const struct radv_shader_args *args);
-void radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level,
- const struct radv_shader_info *info, const struct radv_shader_args *args,
- const struct radv_pipeline_key *pl_key, uint32_t address32_hi);
+void radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
+ const struct radv_shader_args *args, const struct radv_pipeline_key *pl_key,
+ uint32_t address32_hi);
bool radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device);
bool radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_pipeline_stage *vs_stage,
- const struct radv_pipeline_key *pl_key,
- const struct radeon_info *rad_info);
+ const struct radv_pipeline_key *pl_key, const struct radeon_info *rad_info);
bool radv_nir_lower_primitive_shading_rate(nir_shader *nir, enum amd_gfx_level gfx_level);
bool radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_pipeline_stage *fs_stage,
const struct radv_pipeline_key *key);
-bool radv_nir_lower_fs_barycentric(nir_shader *shader, const struct radv_pipeline_key *key,
- unsigned rast_prim);
+bool radv_nir_lower_fs_barycentric(nir_shader *shader, const struct radv_pipeline_key *key, unsigned rast_prim);
bool radv_nir_lower_intrinsics_early(nir_shader *nir, const struct radv_pipeline_key *key);
nir_ssa_def *set_ptr;
if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
- unsigned idx = state->pipeline_layout->set[desc_set].dynamic_offset_start +
- layout->binding[binding].dynamic_offset_offset;
+ unsigned idx =
+ state->pipeline_layout->set[desc_set].dynamic_offset_start + layout->binding[binding].dynamic_offset_offset;
set_ptr = get_scalar_arg(b, 1, state->args->ac.push_constants);
offset = state->pipeline_layout->push_constant_size + idx * 16;
stride = 16;
assert(stride == 16);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
} else {
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- nir_vec3(b, set_ptr, binding_ptr, nir_imm_int(b, stride)));
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vec3(b, set_ptr, binding_ptr, nir_imm_int(b, stride)));
}
nir_instr_remove(&intrin->instr);
}
static void
-visit_vulkan_resource_reindex(nir_builder *b, apply_layout_state *state,
- nir_intrinsic_instr *intrin)
+visit_vulkan_resource_reindex(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
{
VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
if (desc_type == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
} else {
- assert(desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
- desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+ assert(desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
nir_ssa_def *binding_ptr = nir_channel(b, intrin->src[0].ssa, 1);
nir_ssa_def *stride = nir_channel(b, intrin->src[0].ssa, 2);
binding_ptr = nir_iadd_nuw(b, binding_ptr, index);
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- nir_vector_insert_imm(b, intrin->src[0].ssa, binding_ptr, 1));
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vector_insert_imm(b, intrin->src[0].ssa, binding_ptr, 1));
}
nir_instr_remove(&intrin->instr);
}
visit_load_vulkan_descriptor(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
{
if (nir_intrinsic_desc_type(intrin) == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
- nir_ssa_def *addr =
- convert_pointer_to_64_bit(b, state,
- nir_iadd(b, nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa),
- nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa)));
+ nir_ssa_def *addr = convert_pointer_to_64_bit(b, state,
+ nir_iadd(b, nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa),
+ nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa)));
nir_ssa_def *desc = nir_build_load_global(b, 1, 64, addr, .access = ACCESS_NON_WRITEABLE);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
} else {
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- nir_vector_insert_imm(b, intrin->src[0].ssa, nir_imm_int(b, 0), 2));
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vector_insert_imm(b, intrin->src[0].ssa, nir_imm_int(b, 0), 2));
}
nir_instr_remove(&intrin->instr);
}
static nir_ssa_def *
load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc)
{
- uint32_t desc_type =
- S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+ uint32_t desc_type = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (state->gfx_level >= GFX11) {
- desc_type |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
+ desc_type |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (state->gfx_level >= GFX10) {
- desc_type |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+ desc_type |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
+ S_008F0C_RESOURCE_LEVEL(1);
} else {
- desc_type |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ desc_type |=
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
- return nir_vec4(b, rsrc, nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(state->address32_hi)),
- nir_imm_int(b, 0xffffffff), nir_imm_int(b, desc_type));
+ return nir_vec4(b, rsrc, nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(state->address32_hi)), nir_imm_int(b, 0xffffffff),
+ nir_imm_int(b, desc_type));
}
static nir_ssa_def *
-load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc,
- unsigned access)
+load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc, unsigned access)
{
nir_binding binding = nir_chase_binding(nir_src_for_ssa(rsrc));
* VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK.
*/
if (binding.success) {
- struct radv_descriptor_set_layout *layout =
- state->pipeline_layout->set[binding.desc_set].layout;
+ struct radv_descriptor_set_layout *layout = state->pipeline_layout->set[binding.desc_set].layout;
if (layout->binding[binding.binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
rsrc = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
return load_inline_buffer_descriptor(b, state, rsrc);
nir_ssa_def *ptr = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
ptr = nir_iadd_imm(b, ptr, 8);
ptr = convert_pointer_to_64_bit(b, state, ptr);
- size =
- nir_build_load_global(b, 4, 32, ptr, .access = ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER,
- .align_mul = 16, .align_offset = 4);
+ size = nir_build_load_global(b, 4, 32, ptr, .access = ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER, .align_mul = 16,
+ .align_offset = 4);
} else {
/* load the entire descriptor so it can be CSE'd */
nir_ssa_def *ptr = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
}
static nir_ssa_def *
-get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *deref,
- enum ac_descriptor_type desc_type, bool non_uniform, nir_tex_instr *tex,
- bool write)
+get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *deref, enum ac_descriptor_type desc_type,
+ bool non_uniform, nir_tex_instr *tex, bool write)
{
nir_variable *var = nir_deref_instr_get_variable(deref);
assert(var);
uint32_t dword0_mask = tex->op == nir_texop_tg4 ? C_008F30_TRUNC_COORD : 0xffffffffu;
const uint32_t *samplers = radv_immutable_samplers(layout, binding);
- return nir_imm_ivec4(b, samplers[constant_index * 4 + 0] & dword0_mask,
- samplers[constant_index * 4 + 1], samplers[constant_index * 4 + 2],
- samplers[constant_index * 4 + 3]);
+ return nir_imm_ivec4(b, samplers[constant_index * 4 + 0] & dword0_mask, samplers[constant_index * 4 + 1],
+ samplers[constant_index * 4 + 2], samplers[constant_index * 4 + 3]);
}
unsigned size = 8;
* use the tail from plane 1 so that we can store only the first 16 bytes
* of the last plane. */
if (desc_type == AC_DESC_PLANE_2) {
- nir_ssa_def *desc2 =
- get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex, write);
+ nir_ssa_def *desc2 = get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex, write);
nir_ssa_def *comp[8];
for (unsigned i = 0; i < 4; i++)
{
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
const enum glsl_sampler_dim dim = glsl_get_sampler_dim(deref->type);
- bool is_load = intrin->intrinsic == nir_intrinsic_image_deref_load ||
- intrin->intrinsic == nir_intrinsic_image_deref_sparse_load;
+ bool is_load =
+ intrin->intrinsic == nir_intrinsic_image_deref_load || intrin->intrinsic == nir_intrinsic_image_deref_sparse_load;
- nir_ssa_def *desc = get_sampler_desc(
- b, state, deref, dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE,
- nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM, NULL, !is_load);
+ nir_ssa_def *desc = get_sampler_desc(b, state, deref, dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE,
+ nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM, NULL, !is_load);
if (intrin->intrinsic == nir_intrinsic_image_deref_descriptor_amd) {
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
if (plane >= 0) {
assert(tex->op != nir_texop_txf_ms && tex->op != nir_texop_samples_identical);
assert(tex->sampler_dim != GLSL_SAMPLER_DIM_BUF);
- image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_PLANE_0 + plane,
- tex->texture_non_uniform, tex, false);
+ image =
+ get_sampler_desc(b, state, texture_deref_instr, AC_DESC_PLANE_0 + plane, tex->texture_non_uniform, tex, false);
} else if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
- image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_BUFFER,
- tex->texture_non_uniform, tex, false);
+ image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_BUFFER, tex->texture_non_uniform, tex, false);
} else if (tex->op == nir_texop_fragment_mask_fetch_amd) {
- image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_FMASK,
- tex->texture_non_uniform, tex, false);
+ image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_FMASK, tex->texture_non_uniform, tex, false);
} else {
- image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_IMAGE,
- tex->texture_non_uniform, tex, false);
+ image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_IMAGE, tex->texture_non_uniform, tex, false);
}
if (sampler_deref_instr) {
- sampler = get_sampler_desc(b, state, sampler_deref_instr, AC_DESC_SAMPLER,
- tex->sampler_non_uniform, tex, false);
+ sampler = get_sampler_desc(b, state, sampler_deref_instr, AC_DESC_SAMPLER, tex->sampler_non_uniform, tex, false);
- if (state->disable_aniso_single_level && tex->sampler_dim < GLSL_SAMPLER_DIM_RECT &&
- state->gfx_level < GFX8) {
+ if (state->disable_aniso_single_level && tex->sampler_dim < GLSL_SAMPLER_DIM_RECT && state->gfx_level < GFX8) {
/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
*
* GFX6-GFX7:
void
radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
- const struct radv_pipeline_layout *layout,
- const struct radv_shader_info *info,
+ const struct radv_pipeline_layout *layout, const struct radv_shader_info *info,
const struct radv_shader_args *args)
{
apply_layout_state state = {
#include "radv_shader.h"
#include "radv_shader_args.h"
-#define GET_SGPR_FIELD_NIR(arg, field) \
+#define GET_SGPR_FIELD_NIR(arg, field) \
ac_nir_unpack_arg(b, &s->args->ac, arg, field##__SHIFT, util_bitcount(field##__MASK))
typedef struct {
static nir_ssa_def *
load_ring(nir_builder *b, unsigned ring, lower_abi_state *s)
{
- struct ac_arg arg = b->shader->info.stage == MESA_SHADER_TASK ? s->args->task_ring_offsets
- : s->args->ac.ring_offsets;
+ struct ac_arg arg =
+ b->shader->info.stage == MESA_SHADER_TASK ? s->args->task_ring_offsets : s->args->ac.ring_offsets;
nir_ssa_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg);
- ring_offsets =
- nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1));
+ ring_offsets = nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1));
return nir_load_smem_amd(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), .align_mul = 4u);
}
case nir_intrinsic_load_tcs_num_patches_amd:
if (s->pl_key->dynamic_patch_control_points) {
if (stage == MESA_SHADER_TESS_CTRL) {
- replacement = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout,
- TCS_OFFCHIP_LAYOUT_NUM_PATCHES);
+ replacement = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES);
} else {
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->tes_num_patches);
}
replacement = load_ring(b, RING_PS_ATTR, s);
/* Note, the HW always assumes there is at least 1 per-vertex param. */
- const unsigned total_num_params =
- MAX2(1, s->info->outinfo.param_exports) + s->info->outinfo.prim_param_exports;
+ const unsigned total_num_params = MAX2(1, s->info->outinfo.param_exports) + s->info->outinfo.prim_param_exports;
nir_ssa_def *dword1 = nir_channel(b, replacement, 1);
dword1 = nir_ior_imm(b, dword1, S_008F04_STRIDE(16 * total_num_params));
case nir_intrinsic_load_ring_attr_offset_amd: {
nir_ssa_def *ring_attr_offset = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_attr_offset);
- replacement =
- nir_ishl_imm(b, nir_ubfe_imm(b, ring_attr_offset, 0, 15),
- 9); /* 512b increments. */
+ replacement = nir_ishl_imm(b, nir_ubfe_imm(b, ring_attr_offset, 0, 15), 9); /* 512b increments. */
break;
}
case nir_intrinsic_load_tess_rel_patch_id_amd:
if (stage == MESA_SHADER_TESS_CTRL) {
- replacement = nir_extract_u8(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_rel_ids),
- nir_imm_int(b, 0));
+ replacement = nir_extract_u8(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_rel_ids), nir_imm_int(b, 0));
} else if (stage == MESA_SHADER_TESS_EVAL) {
/* Setting an upper bound like this will actually make it possible
* to optimize some multiplications (in address calculations) so that
*/
nir_ssa_def *arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tes_rel_patch_id);
nir_intrinsic_instr *load_arg = nir_instr_as_intrinsic(arg->parent_instr);
- nir_intrinsic_set_arg_upper_bound_u32_amd(
- load_arg, 2048 / MAX2(b->shader->info.tess.tcs_vertices_out, 1));
+ nir_intrinsic_set_arg_upper_bound_u32_amd(load_arg, 2048 / MAX2(b->shader->info.tess.tcs_vertices_out, 1));
replacement = arg;
} else {
unreachable("invalid tessellation shader stage");
case nir_intrinsic_load_patch_vertices_in:
if (stage == MESA_SHADER_TESS_CTRL) {
if (s->pl_key->dynamic_patch_control_points) {
- replacement = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout,
- TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS);
+ replacement = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS);
} else {
replacement = nir_imm_int(b, s->pl_key->tcs.tess_input_vertices);
}
unreachable("invalid tessellation shader stage");
break;
case nir_intrinsic_load_gs_vertex_offset_amd:
- replacement =
- ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[nir_intrinsic_base(intrin)]);
+ replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[nir_intrinsic_base(intrin)]);
break;
case nir_intrinsic_load_workgroup_num_input_vertices_amd:
- replacement = nir_ubfe_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info),
- 12, 9);
+ replacement = nir_ubfe_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 12, 9);
break;
case nir_intrinsic_load_workgroup_num_input_primitives_amd:
- replacement = nir_ubfe_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info),
- 22, 9);
+ replacement = nir_ubfe_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 22, 9);
break;
case nir_intrinsic_load_packed_passthrough_primitive_amd:
/* NGG passthrough mode: the HW already packs the primitive export value to a single register.
* so the below is equivalent to: "ult(ubfe(gs_tg_info, 22, 9), 16)", but
* ACO can optimize out the comparison to zero (see try_optimize_scc_nocompare).
*/
- nir_ssa_def *small_workgroup =
- nir_ieq_imm(b, nir_iand_imm(b, gs_tg_info, BITFIELD_RANGE(22 + 4, 9 - 4)), 0);
+ nir_ssa_def *small_workgroup = nir_ieq_imm(b, nir_iand_imm(b, gs_tg_info, BITFIELD_RANGE(22 + 4, 9 - 4)), 0);
- nir_ssa_def *mask = nir_bcsel(
- b, small_workgroup, nir_imm_int(b, radv_nggc_none),
- nir_imm_int(b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives));
+ nir_ssa_def *mask =
+ nir_bcsel(b, small_workgroup, nir_imm_int(b, radv_nggc_none),
+ nir_imm_int(b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives));
nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
replacement = nir_ine_imm(b, nir_iand(b, settings, mask), 0);
break;
break;
case nir_intrinsic_load_ring_mesh_scratch_offset_amd:
/* gs_tg_info[0:11] is ordered_wave_id. Multiply by the ring entry size. */
- replacement = nir_imul_imm(
- b, nir_iand_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 0xfff),
- RADV_MESH_SCRATCH_ENTRY_BYTES);
+ replacement = nir_imul_imm(b, nir_iand_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 0xfff),
+ RADV_MESH_SCRATCH_ENTRY_BYTES);
break;
case nir_intrinsic_load_task_ring_entry_amd:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry);
break;
case nir_intrinsic_load_lshs_vertex_stride_amd: {
- unsigned io_num = stage == MESA_SHADER_VERTEX ? s->info->vs.num_linked_outputs
- : s->info->tcs.num_linked_inputs;
+ unsigned io_num = stage == MESA_SHADER_VERTEX ? s->info->vs.num_linked_outputs : s->info->tcs.num_linked_inputs;
replacement = nir_imm_int(b, get_tcs_input_vertex_stride(io_num));
break;
}
case nir_intrinsic_load_esgs_vertex_stride_amd: {
/* Emulate VGT_ESGS_RING_ITEMSIZE on GFX9+ to reduce context register writes. */
assert(s->gfx_level >= GFX9);
- const unsigned stride = s->info->is_ngg ? s->info->ngg_info.vgt_esgs_ring_itemsize
- : s->info->gs_ring_info.vgt_esgs_ring_itemsize;
+ const unsigned stride =
+ s->info->is_ngg ? s->info->ngg_info.vgt_esgs_ring_itemsize : s->info->gs_ring_info.vgt_esgs_ring_itemsize;
replacement = nir_imm_int(b, stride);
break;
}
case nir_intrinsic_load_hs_out_patch_data_offset_amd: {
unsigned out_vertices_per_patch = b->shader->info.tess.tcs_vertices_out;
- unsigned num_tcs_outputs = stage == MESA_SHADER_TESS_CTRL ? s->info->tcs.num_linked_outputs
- : s->info->tes.num_linked_inputs;
+ unsigned num_tcs_outputs =
+ stage == MESA_SHADER_TESS_CTRL ? s->info->tcs.num_linked_outputs : s->info->tes.num_linked_inputs;
int per_vertex_output_patch_size = out_vertices_per_patch * num_tcs_outputs * 16u;
if (s->pl_key->dynamic_patch_control_points) {
nir_ssa_def *num_patches;
if (stage == MESA_SHADER_TESS_CTRL) {
- num_patches = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout,
- TCS_OFFCHIP_LAYOUT_NUM_PATCHES);
+ num_patches = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES);
} else {
num_patches = ac_nir_load_arg(b, &s->args->ac, s->args->tes_num_patches);
}
offset = nir_iadd(b, offset, nir_ishl_imm(b, intrin->src[1].ssa, 3));
}
- replacement = nir_load_global_amd(b, 2, 32, addr, offset, .base = sample_pos_offset,
- .access = ACCESS_NON_WRITEABLE);
+ replacement =
+ nir_load_global_amd(b, 2, 32, addr, offset, .base = sample_pos_offset, .access = ACCESS_NON_WRITEABLE);
break;
}
case nir_intrinsic_load_rasterization_samples_amd:
break;
}
case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
- nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa,
- nir_imm_int(b, RADV_NGG_QUERY_PIPELINE_STAT_OFFSET),
+ nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, RADV_NGG_QUERY_PIPELINE_STAT_OFFSET),
nir_imm_int(b, 0x100));
break;
case nir_intrinsic_atomic_add_gen_prim_count_amd:
- nir_gds_atomic_add_amd(
- b, 32, intrin->src[0].ssa,
- nir_imm_int(b, RADV_NGG_QUERY_PRIM_GEN_OFFSET(nir_intrinsic_stream_id(intrin))),
- nir_imm_int(b, 0x100));
+ nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa,
+ nir_imm_int(b, RADV_NGG_QUERY_PRIM_GEN_OFFSET(nir_intrinsic_stream_id(intrin))),
+ nir_imm_int(b, 0x100));
break;
case nir_intrinsic_atomic_add_xfb_prim_count_amd:
- nir_gds_atomic_add_amd(
- b, 32, intrin->src[0].ssa,
- nir_imm_int(b, RADV_NGG_QUERY_PRIM_XFB_OFFSET(nir_intrinsic_stream_id(intrin))),
- nir_imm_int(b, 0x100));
+ nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa,
+ nir_imm_int(b, RADV_NGG_QUERY_PRIM_XFB_OFFSET(nir_intrinsic_stream_id(intrin))),
+ nir_imm_int(b, 0x100));
break;
case nir_intrinsic_atomic_add_gs_invocation_count_amd:
/* TODO: add gs invocation query emulation. */
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_write_index);
break;
case nir_intrinsic_load_streamout_buffer_amd: {
- nir_ssa_def *ptr =
- nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers),
- nir_imm_int(b, s->address32_hi));
+ nir_ssa_def *ptr = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers),
+ nir_imm_int(b, s->address32_hi));
replacement = nir_load_smem_amd(b, 4, ptr, nir_imm_int(b, nir_intrinsic_base(intrin) * 16));
break;
}
case nir_intrinsic_load_streamout_offset_amd:
- replacement =
- ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_offset[nir_intrinsic_base(intrin)]);
+ replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_offset[nir_intrinsic_base(intrin)]);
break;
case nir_intrinsic_load_lds_ngg_gs_out_vertex_base_amd:
}
case nir_intrinsic_load_poly_line_smooth_enabled:
if (s->pl_key->dynamic_line_rast_mode) {
- nir_ssa_def *line_rast_mode =
- GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_LINE_RAST_MODE);
- replacement =
- nir_ieq_imm(b, line_rast_mode, VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT);
+ nir_ssa_def *line_rast_mode = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_LINE_RAST_MODE);
+ replacement = nir_ieq_imm(b, line_rast_mode, VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT);
} else {
replacement = nir_imm_bool(b, s->pl_key->ps.line_smooth_enabled);
}
assert(stride < (1 << 14));
if (stream_offset) {
- nir_ssa_def *addr =
- nir_pack_64_2x32_split(b, nir_channel(b, ring, 0), nir_channel(b, ring, 1));
+ nir_ssa_def *addr = nir_pack_64_2x32_split(b, nir_channel(b, ring, 0), nir_channel(b, ring, 1));
addr = nir_iadd_imm(b, addr, stream_offset);
ring = nir_vector_insert_imm(b, ring, nir_unpack_64_2x32_split_x(b, addr), 0);
ring = nir_vector_insert_imm(b, ring, nir_unpack_64_2x32_split_y(b, addr), 1);
}
- ring = nir_vector_insert_imm(
- b, ring, nir_ior_imm(b, nir_channel(b, ring, 1), S_008F04_STRIDE(stride)), 1);
+ ring = nir_vector_insert_imm(b, ring, nir_ior_imm(b, nir_channel(b, ring, 1), S_008F04_STRIDE(stride)), 1);
return nir_vector_insert_imm(b, ring, nir_imm_int(b, s->info->wave_size), 2);
}
void
-radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level,
- const struct radv_shader_info *info, const struct radv_shader_args *args,
- const struct radv_pipeline_key *pl_key, uint32_t address32_hi)
+radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
+ const struct radv_shader_args *args, const struct radv_pipeline_key *pl_key, uint32_t address32_hi)
{
lower_abi_state state = {
.gfx_level = gfx_level,
state.gsvs_ring[i] = load_gsvs_ring(&b, &state, i);
}
- nir_shader_instructions_pass(shader, lower_abi_instr,
- nir_metadata_dominance | nir_metadata_block_index, &state);
+ nir_shader_instructions_pass(shader, lower_abi_instr, nir_metadata_dominance | nir_metadata_block_index, &state);
}
{
nir_ssa_def *pull_model = nir_load_barycentric_model(b, 32);
- nir_ssa_def *deriv_x = nir_vec3(b, nir_fddx_fine(b, nir_channel(b, pull_model, 0)),
- nir_fddx_fine(b, nir_channel(b, pull_model, 1)),
- nir_fddx_fine(b, nir_channel(b, pull_model, 2)));
- nir_ssa_def *deriv_y = nir_vec3(b, nir_fddy_fine(b, nir_channel(b, pull_model, 0)),
- nir_fddy_fine(b, nir_channel(b, pull_model, 1)),
- nir_fddy_fine(b, nir_channel(b, pull_model, 2)));
+ nir_ssa_def *deriv_x =
+ nir_vec3(b, nir_fddx_fine(b, nir_channel(b, pull_model, 0)), nir_fddx_fine(b, nir_channel(b, pull_model, 1)),
+ nir_fddx_fine(b, nir_channel(b, pull_model, 2)));
+ nir_ssa_def *deriv_y =
+ nir_vec3(b, nir_fddy_fine(b, nir_channel(b, pull_model, 0)), nir_fddy_fine(b, nir_channel(b, pull_model, 1)),
+ nir_fddy_fine(b, nir_channel(b, pull_model, 2)));
nir_ssa_def *offset_x = nir_channel(b, offset, 0);
nir_ssa_def *offset_y = nir_channel(b, offset, 1);
}
static nir_ssa_def *
-lower_barycentric_coord_at_sample(nir_builder *b, lower_fs_barycentric_state *state,
- nir_intrinsic_instr *intrin)
+lower_barycentric_coord_at_sample(nir_builder *b, lower_fs_barycentric_state *state, nir_intrinsic_instr *intrin)
{
const enum glsl_interp_mode mode = (enum glsl_interp_mode)nir_intrinsic_interp_mode(intrin);
nir_ssa_def *num_samples = nir_load_rasterization_samples_amd(b);
nir_push_if(b, nir_ieq_imm(b, num_samples, 1));
{
- res1 = nir_load_barycentric_pixel(
- b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin));
+ res1 = nir_load_barycentric_pixel(b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin));
}
nir_push_else(b, NULL);
{
- nir_ssa_def *sample_pos =
- nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples);
+ nir_ssa_def *sample_pos = nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples);
/* sample_pos -= 0.5 */
sample_pos = nir_fadd_imm(b, sample_pos, -0.5f);
new_dest = nir_if_phi(b, res1, res2);
} else {
if (!state->num_rasterization_samples) {
- new_dest = nir_load_barycentric_pixel(
- b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin));
+ new_dest = nir_load_barycentric_pixel(b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin));
} else {
- nir_ssa_def *sample_pos =
- nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples);
+ nir_ssa_def *sample_pos = nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples);
/* sample_pos -= 0.5 */
sample_pos = nir_fadd_imm(b, sample_pos, -0.5f);
*/
nir_ssa_def *quad_id = nir_ushr_imm(b, nir_load_subgroup_invocation(b), 2);
nir_ssa_def *provoking_vtx = nir_load_provoking_vtx_amd(b);
- nir_ssa_def *provoking_vtx_id =
- nir_ubfe(b, provoking_vtx, nir_ishl_imm(b, quad_id, 1), nir_imm_int(b, 2));
+ nir_ssa_def *provoking_vtx_id = nir_ubfe(b, provoking_vtx, nir_ishl_imm(b, quad_id, 1), nir_imm_int(b, 2));
/* Compute barycentrics. */
v0_bary[0] = nir_fsub(b, nir_fsub(b, nir_imm_float(b, 1.0f), p2), p1);
}
static bool
-lower_load_barycentric_coord(nir_builder *b, lower_fs_barycentric_state *state,
- nir_intrinsic_instr *intrin)
+lower_load_barycentric_coord(nir_builder *b, lower_fs_barycentric_state *state, nir_intrinsic_instr *intrin)
{
nir_ssa_def *interp, *p1, *p2;
nir_ssa_def *new_dest;
}
bool
-radv_nir_lower_fs_barycentric(nir_shader *shader, const struct radv_pipeline_key *key,
- unsigned rast_prim)
+radv_nir_lower_fs_barycentric(nir_shader *shader, const struct radv_pipeline_key *key, unsigned rast_prim)
{
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
bool progress = false;
switch (intrin->intrinsic) {
case nir_intrinsic_load_sample_mask_in: {
- nir_ssa_def *sample_coverage =
- nir_load_vector_arg_amd(&b, 1, .base = args->ac.sample_coverage.arg_index);
+ nir_ssa_def *sample_coverage = nir_load_vector_arg_amd(&b, 1, .base = args->ac.sample_coverage.arg_index);
nir_ssa_def *def = NULL;
if (info->ps.uses_sample_shading || key->ps.sample_shading_enable) {
/* gl_SampleMaskIn[0] = (SampleCoverage & (PsIterMask << gl_SampleID)). */
- nir_ssa_def *ps_state =
- nir_load_scalar_arg_amd(&b, 1, .base = args->ps_state.arg_index);
- nir_ssa_def *ps_iter_mask = nir_ubfe_imm(&b, ps_state, PS_STATE_PS_ITER_MASK__SHIFT,
- util_bitcount(PS_STATE_PS_ITER_MASK__MASK));
+ nir_ssa_def *ps_state = nir_load_scalar_arg_amd(&b, 1, .base = args->ps_state.arg_index);
+ nir_ssa_def *ps_iter_mask =
+ nir_ubfe_imm(&b, ps_state, PS_STATE_PS_ITER_MASK__SHIFT, util_bitcount(PS_STATE_PS_ITER_MASK__MASK));
nir_ssa_def *sample_id = nir_load_sample_id(&b);
def = nir_iand(&b, sample_coverage, nir_ishl(&b, ps_iter_mask, sample_id));
} else {
adjusted_frag_z = nir_ffma_imm1(&b, adjusted_frag_z, 0.0625f, frag_z);
/* VRS Rate X = Ancillary[2:3] */
- nir_ssa_def *ancillary =
- nir_load_vector_arg_amd(&b, 1, .base = args->ac.ancillary.arg_index);
+ nir_ssa_def *ancillary = nir_load_vector_arg_amd(&b, 1, .base = args->ac.ancillary.arg_index);
nir_ssa_def *x_rate = nir_ubfe_imm(&b, ancillary, 2, 2);
/* xRate = xRate == 0x1 ? adjusted_frag_z : frag_z. */
nir_push_if(&b, nir_ieq_imm(&b, num_samples, 1));
{
- res1 = nir_load_barycentric_pixel(
- &b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin));
+ res1 = nir_load_barycentric_pixel(&b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin));
}
nir_push_else(&b, NULL);
{
- nir_ssa_def *sample_pos =
- nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples);
+ nir_ssa_def *sample_pos = nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples);
/* sample_pos -= 0.5 */
sample_pos = nir_fadd_imm(&b, sample_pos, -0.5f);
- res2 = nir_load_barycentric_at_offset(
- &b, 32, sample_pos, .interp_mode = nir_intrinsic_interp_mode(intrin));
+ res2 = nir_load_barycentric_at_offset(&b, 32, sample_pos,
+ .interp_mode = nir_intrinsic_interp_mode(intrin));
}
nir_pop_if(&b, NULL);
new_dest = nir_if_phi(&b, res1, res2);
} else {
if (!key->ps.num_samples) {
- new_dest = nir_load_barycentric_pixel(
- &b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin));
+ new_dest = nir_load_barycentric_pixel(&b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin));
} else {
- nir_ssa_def *sample_pos =
- nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples);
+ nir_ssa_def *sample_pos = nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples);
/* sample_pos -= 0.5 */
sample_pos = nir_fadd_imm(&b, sample_pos, -0.5f);
- new_dest = nir_load_barycentric_at_offset(
- &b, 32, sample_pos, .interp_mode = nir_intrinsic_interp_mode(intrin));
+ new_dest = nir_load_barycentric_at_offset(&b, 32, sample_pos,
+ .interp_mode = nir_intrinsic_interp_mode(intrin));
}
}
NIR_PASS(progress, nir, nir_lower_array_deref_of_vec, mask,
nir_lower_direct_array_deref_of_vec_load | nir_lower_indirect_array_deref_of_vec_load |
- nir_lower_direct_array_deref_of_vec_store |
- nir_lower_indirect_array_deref_of_vec_store);
+ nir_lower_direct_array_deref_of_vec_store | nir_lower_indirect_array_deref_of_vec_store);
NIR_PASS(progress, nir, nir_lower_io_to_scalar_early, mask);
if (progress) {
/* Optimize the new vector code and then remove dead vars */
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
NIR_PASS(_, nir, nir_opt_dce);
- NIR_PASS(_, nir, nir_remove_dead_variables,
- nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL);
+ NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL);
}
}
if (nir->info.stage == MESA_SHADER_VERTEX) {
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0);
- NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4,
- nir_lower_io_lower_64bit_to_32);
+ NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32);
} else {
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4,
nir_lower_io_lower_64bit_to_32);
info->vs.tcs_temp_only_input_mask);
return true;
} else if (info->vs.as_es) {
- NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, NULL,
- device->physical_device->rad_info.gfx_level, info->esgs_itemsize);
+ NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, NULL, device->physical_device->rad_info.gfx_level,
+ info->esgs_itemsize);
return true;
}
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, NULL, info->vs.tcs_in_out_eq);
- NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, NULL,
- device->physical_device->rad_info.gfx_level, info->tcs.tes_reads_tess_factors,
- info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read,
- info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs, info->wave_size,
- false, false, true);
+ NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, NULL, device->physical_device->rad_info.gfx_level,
+ info->tcs.tes_reads_tess_factors, info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read,
+ info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs, info->wave_size, false, false, true);
return true;
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
NIR_PASS_V(nir, ac_nir_lower_tes_inputs_to_mem, NULL);
if (info->tes.as_es) {
- NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, NULL,
- device->physical_device->rad_info.gfx_level, info->esgs_itemsize);
+ NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, NULL, device->physical_device->rad_info.gfx_level,
+ info->esgs_itemsize);
}
return true;
} else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
- NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, NULL,
- device->physical_device->rad_info.gfx_level, false);
+ NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, NULL, device->physical_device->rad_info.gfx_level, false);
return true;
} else if (nir->info.stage == MESA_SHADER_TASK) {
ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES,
device->physical_device->task_info.num_entries);
return true;
} else if (nir->info.stage == MESA_SHADER_MESH) {
- ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES,
- device->physical_device->task_info.num_entries);
+ ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, device->physical_device->task_info.num_entries);
return true;
}
continue;
nir_variable *var = nir_intrinsic_get_var(intr, 0);
- if (var->data.mode != nir_var_shader_out ||
- var->data.location != VARYING_SLOT_PRIMITIVE_SHADING_RATE)
+ if (var->data.mode != nir_var_shader_out || var->data.location != VARYING_SLOT_PRIMITIVE_SHADING_RATE)
continue;
b.cursor = nir_before_instr(instr);
y_rate_shift += 26;
}
- out = nir_ior(&b, nir_ishl_imm(&b, x_rate, x_rate_shift),
- nir_ishl_imm(&b, y_rate, y_rate_shift));
+ out = nir_ior(&b, nir_ishl_imm(&b, x_rate, x_rate_shift), nir_ishl_imm(&b, y_rate, y_rate_shift));
nir_instr_rewrite_src(&intr->instr, &intr->src[1], nir_src_for_ssa(out));
} rq_variable;
static rq_variable *
-rq_variable_create(void *ctx, nir_shader *shader, unsigned array_length,
- const struct glsl_type *type, const char *name)
+rq_variable_create(void *ctx, nir_shader *shader, unsigned array_length, const struct glsl_type *type, const char *name)
{
rq_variable *result = ralloc(ctx, rq_variable);
result->array_length = array_length;
}
static void
-nir_store_array(nir_builder *b, nir_variable *array, nir_ssa_def *index, nir_ssa_def *value,
- unsigned writemask)
+nir_store_array(nir_builder *b, nir_variable *array, nir_ssa_def *index, nir_ssa_def *value, unsigned writemask)
{
- nir_store_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, array), index), value,
- writemask);
+ nir_store_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, array), index), value, writemask);
}
static nir_deref_instr *
}
static void
-rq_store_var(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *value,
- unsigned writemask)
+rq_store_var(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *value, unsigned writemask)
{
if (var->array_length == 1) {
nir_store_var(b, var->variable, value, writemask);
return nir_load_array(b, var->variable, array_index);
return nir_load_deref(
- b,
- nir_build_deref_array(
- b, nir_build_deref_array(b, nir_build_deref_var(b, var->variable), index), array_index));
+ b, nir_build_deref_array(b, nir_build_deref_array(b, nir_build_deref_var(b, var->variable), index), array_index));
}
static void
-rq_store_array(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *array_index,
- nir_ssa_def *value, unsigned writemask)
+rq_store_array(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *array_index, nir_ssa_def *value,
+ unsigned writemask)
{
if (var->array_length == 1) {
nir_store_array(b, var->variable, array_index, value, writemask);
} else {
nir_store_deref(
b,
- nir_build_deref_array(
- b, nir_build_deref_array(b, nir_build_deref_var(b, var->variable), index), array_index),
+ nir_build_deref_array(b, nir_build_deref_array(b, nir_build_deref_var(b, var->variable), index), array_index),
value, writemask);
}
}
uint32_t stack_entries;
};
-#define VAR_NAME(name) \
- strcat(strcpy(ralloc_size(ctx, strlen(base_name) + strlen(name) + 1), base_name), name)
+#define VAR_NAME(name) strcat(strcpy(ralloc_size(ctx, strlen(base_name) + strlen(name) + 1), base_name), name)
static struct ray_query_traversal_vars
-init_ray_query_traversal_vars(void *ctx, nir_shader *shader, unsigned array_length,
- const char *base_name)
+init_ray_query_traversal_vars(void *ctx, nir_shader *shader, unsigned array_length, const char *base_name)
{
struct ray_query_traversal_vars result;
const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
result.origin = rq_variable_create(ctx, shader, array_length, vec3_type, VAR_NAME("_origin"));
- result.direction =
- rq_variable_create(ctx, shader, array_length, vec3_type, VAR_NAME("_direction"));
-
- result.bvh_base =
- rq_variable_create(ctx, shader, array_length, glsl_uint64_t_type(), VAR_NAME("_bvh_base"));
- result.stack =
- rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_stack"));
- result.top_stack =
- rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_top_stack"));
- result.stack_low_watermark = rq_variable_create(ctx, shader, array_length, glsl_uint_type(),
- VAR_NAME("_stack_low_watermark"));
- result.current_node =
- rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_current_node"));
- result.previous_node =
- rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_previous_node"));
- result.instance_top_node = rq_variable_create(ctx, shader, array_length, glsl_uint_type(),
- VAR_NAME("_instance_top_node"));
- result.instance_bottom_node = rq_variable_create(ctx, shader, array_length, glsl_uint_type(),
- VAR_NAME("_instance_bottom_node"));
+ result.direction = rq_variable_create(ctx, shader, array_length, vec3_type, VAR_NAME("_direction"));
+
+ result.bvh_base = rq_variable_create(ctx, shader, array_length, glsl_uint64_t_type(), VAR_NAME("_bvh_base"));
+ result.stack = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_stack"));
+ result.top_stack = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_top_stack"));
+ result.stack_low_watermark =
+ rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_stack_low_watermark"));
+ result.current_node = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_current_node"));
+ result.previous_node = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_previous_node"));
+ result.instance_top_node =
+ rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_instance_top_node"));
+ result.instance_bottom_node =
+ rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_instance_bottom_node"));
return result;
}
static struct ray_query_intersection_vars
-init_ray_query_intersection_vars(void *ctx, nir_shader *shader, unsigned array_length,
- const char *base_name)
+init_ray_query_intersection_vars(void *ctx, nir_shader *shader, unsigned array_length, const char *base_name)
{
struct ray_query_intersection_vars result;
const struct glsl_type *vec2_type = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
- result.primitive_id =
- rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_primitive_id"));
- result.geometry_id_and_flags = rq_variable_create(ctx, shader, array_length, glsl_uint_type(),
- VAR_NAME("_geometry_id_and_flags"));
- result.instance_addr = rq_variable_create(ctx, shader, array_length, glsl_uint64_t_type(),
- VAR_NAME("_instance_addr"));
- result.intersection_type = rq_variable_create(ctx, shader, array_length, glsl_uint_type(),
- VAR_NAME("_intersection_type"));
- result.opaque =
- rq_variable_create(ctx, shader, array_length, glsl_bool_type(), VAR_NAME("_opaque"));
- result.frontface =
- rq_variable_create(ctx, shader, array_length, glsl_bool_type(), VAR_NAME("_frontface"));
- result.sbt_offset_and_flags = rq_variable_create(ctx, shader, array_length, glsl_uint_type(),
- VAR_NAME("_sbt_offset_and_flags"));
- result.barycentrics =
- rq_variable_create(ctx, shader, array_length, vec2_type, VAR_NAME("_barycentrics"));
+ result.primitive_id = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_primitive_id"));
+ result.geometry_id_and_flags =
+ rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_geometry_id_and_flags"));
+ result.instance_addr =
+ rq_variable_create(ctx, shader, array_length, glsl_uint64_t_type(), VAR_NAME("_instance_addr"));
+ result.intersection_type =
+ rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_intersection_type"));
+ result.opaque = rq_variable_create(ctx, shader, array_length, glsl_bool_type(), VAR_NAME("_opaque"));
+ result.frontface = rq_variable_create(ctx, shader, array_length, glsl_bool_type(), VAR_NAME("_frontface"));
+ result.sbt_offset_and_flags =
+ rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_sbt_offset_and_flags"));
+ result.barycentrics = rq_variable_create(ctx, shader, array_length, vec2_type, VAR_NAME("_barycentrics"));
result.t = rq_variable_create(ctx, shader, array_length, glsl_float_type(), VAR_NAME("_t"));
return result;
}
static void
-init_ray_query_vars(nir_shader *shader, unsigned array_length, struct ray_query_vars *dst,
- const char *base_name, uint32_t max_shared_size)
+init_ray_query_vars(nir_shader *shader, unsigned array_length, struct ray_query_vars *dst, const char *base_name,
+ uint32_t max_shared_size)
{
void *ctx = dst;
const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
- dst->root_bvh_base = rq_variable_create(dst, shader, array_length, glsl_uint64_t_type(),
- VAR_NAME("_root_bvh_base"));
+ dst->root_bvh_base = rq_variable_create(dst, shader, array_length, glsl_uint64_t_type(), VAR_NAME("_root_bvh_base"));
dst->flags = rq_variable_create(dst, shader, array_length, glsl_uint_type(), VAR_NAME("_flags"));
- dst->cull_mask =
- rq_variable_create(dst, shader, array_length, glsl_uint_type(), VAR_NAME("_cull_mask"));
+ dst->cull_mask = rq_variable_create(dst, shader, array_length, glsl_uint_type(), VAR_NAME("_cull_mask"));
dst->origin = rq_variable_create(dst, shader, array_length, vec3_type, VAR_NAME("_origin"));
dst->tmin = rq_variable_create(dst, shader, array_length, glsl_float_type(), VAR_NAME("_tmin"));
- dst->direction =
- rq_variable_create(dst, shader, array_length, vec3_type, VAR_NAME("_direction"));
+ dst->direction = rq_variable_create(dst, shader, array_length, vec3_type, VAR_NAME("_direction"));
- dst->incomplete =
- rq_variable_create(dst, shader, array_length, glsl_bool_type(), VAR_NAME("_incomplete"));
+ dst->incomplete = rq_variable_create(dst, shader, array_length, glsl_bool_type(), VAR_NAME("_incomplete"));
dst->closest = init_ray_query_intersection_vars(dst, shader, array_length, VAR_NAME("_closest"));
- dst->candidate =
- init_ray_query_intersection_vars(dst, shader, array_length, VAR_NAME("_candidate"));
+ dst->candidate = init_ray_query_intersection_vars(dst, shader, array_length, VAR_NAME("_candidate"));
dst->trav = init_ray_query_traversal_vars(dst, shader, array_length, VAR_NAME("_top"));
- uint32_t workgroup_size = shader->info.workgroup_size[0] * shader->info.workgroup_size[1] *
- shader->info.workgroup_size[2];
+ uint32_t workgroup_size =
+ shader->info.workgroup_size[0] * shader->info.workgroup_size[1] * shader->info.workgroup_size[2];
uint32_t shared_stack_entries = shader->info.ray_queries == 1 ? 16 : 8;
uint32_t shared_stack_size = workgroup_size * shared_stack_entries * 4;
uint32_t shared_offset = align(shader->info.shared_size, 4);
if (shader->info.stage != MESA_SHADER_COMPUTE || array_length > 1 ||
shared_offset + shared_stack_size > max_shared_size) {
- dst->stack = rq_variable_create(
- dst, shader, array_length,
- glsl_array_type(glsl_uint_type(), MAX_SCRATCH_STACK_ENTRY_COUNT, 0), VAR_NAME("_stack"));
+ dst->stack =
+ rq_variable_create(dst, shader, array_length,
+ glsl_array_type(glsl_uint_type(), MAX_SCRATCH_STACK_ENTRY_COUNT, 0), VAR_NAME("_stack"));
dst->stack_entries = MAX_SCRATCH_STACK_ENTRY_COUNT;
} else {
dst->stack = NULL;
#undef VAR_NAME
static void
-lower_ray_query(nir_shader *shader, nir_variable *ray_query, struct hash_table *ht,
- uint32_t max_shared_size)
+lower_ray_query(nir_shader *shader, nir_variable *ray_query, struct hash_table *ht, uint32_t max_shared_size)
{
struct ray_query_vars *vars = ralloc(ht, struct ray_query_vars);
if (glsl_type_is_array(ray_query->type))
array_length = glsl_get_length(ray_query->type);
- init_ray_query_vars(shader, array_length, vars, ray_query->name == NULL ? "" : ray_query->name,
- max_shared_size);
+ init_ray_query_vars(shader, array_length, vars, ray_query->name == NULL ? "" : ray_query->name, max_shared_size);
_mesa_hash_table_insert(ht, ray_query, vars);
}
copy_candidate_to_closest(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars)
{
rq_copy_var(b, index, vars->closest.barycentrics, vars->candidate.barycentrics, 0x3);
- rq_copy_var(b, index, vars->closest.geometry_id_and_flags, vars->candidate.geometry_id_and_flags,
- 0x1);
+ rq_copy_var(b, index, vars->closest.geometry_id_and_flags, vars->candidate.geometry_id_and_flags, 0x1);
rq_copy_var(b, index, vars->closest.instance_addr, vars->candidate.instance_addr, 0x1);
rq_copy_var(b, index, vars->closest.intersection_type, vars->candidate.intersection_type, 0x1);
rq_copy_var(b, index, vars->closest.opaque, vars->candidate.opaque, 0x1);
rq_copy_var(b, index, vars->closest.frontface, vars->candidate.frontface, 0x1);
- rq_copy_var(b, index, vars->closest.sbt_offset_and_flags, vars->candidate.sbt_offset_and_flags,
- 0x1);
+ rq_copy_var(b, index, vars->closest.sbt_offset_and_flags, vars->candidate.sbt_offset_and_flags, 0x1);
rq_copy_var(b, index, vars->closest.primitive_id, vars->candidate.primitive_id, 0x1);
rq_copy_var(b, index, vars->closest.t, vars->candidate.t, 0x1);
}
if (ray_flags)
terminate_on_first_hit = ray_flags->terminate_on_first_hit;
else
- terminate_on_first_hit = nir_test_mask(b, rq_load_var(b, index, vars->flags),
- SpvRayFlagsTerminateOnFirstHitKHRMask);
+ terminate_on_first_hit =
+ nir_test_mask(b, rq_load_var(b, index, vars->flags), SpvRayFlagsTerminateOnFirstHitKHRMask);
nir_push_if(b, terminate_on_first_hit);
{
rq_store_var(b, index, vars->incomplete, nir_imm_false(b), 0x1);
nir_pop_if(b, NULL);
}
-enum rq_intersection_type {
- intersection_type_none,
- intersection_type_triangle,
- intersection_type_aabb
-};
+enum rq_intersection_type { intersection_type_none, intersection_type_triangle, intersection_type_aabb };
static void
-lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr,
- struct ray_query_vars *vars, struct radv_instance *instance)
+lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars,
+ struct radv_instance *instance)
{
rq_store_var(b, index, vars->flags, instr->src[2].ssa, 0x1);
rq_store_var(b, index, vars->cull_mask, nir_ishl_imm(b, instr->src[3].ssa, 24), 0x1);
rq_store_var(b, index, vars->trav.direction, instr->src[6].ssa, 0x7);
rq_store_var(b, index, vars->closest.t, instr->src[7].ssa, 0x1);
- rq_store_var(b, index, vars->closest.intersection_type, nir_imm_int(b, intersection_type_none),
- 0x1);
+ rq_store_var(b, index, vars->closest.intersection_type, nir_imm_int(b, intersection_type_none), 0x1);
nir_ssa_def *accel_struct = instr->src[1].ssa;
nir_ssa_def *bvh_offset = nir_build_load_global(
- b, 1, 32,
- nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
+ b, 1, 32, nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
.access = ACCESS_NON_WRITEABLE);
nir_ssa_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset));
bvh_base = build_addr_to_node(b, bvh_base);
rq_store_var(b, index, vars->trav.stack, nir_imm_int(b, 0), 0x1);
rq_store_var(b, index, vars->trav.stack_low_watermark, nir_imm_int(b, 0), 0x1);
} else {
- nir_ssa_def *base_offset =
- nir_imul_imm(b, nir_load_local_invocation_index(b), sizeof(uint32_t));
+ nir_ssa_def *base_offset = nir_imul_imm(b, nir_load_local_invocation_index(b), sizeof(uint32_t));
base_offset = nir_iadd_imm(b, base_offset, vars->shared_base);
rq_store_var(b, index, vars->trav.stack, base_offset, 0x1);
rq_store_var(b, index, vars->trav.stack_low_watermark, base_offset, 0x1);
rq_store_var(b, index, vars->trav.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 0x1);
rq_store_var(b, index, vars->trav.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
rq_store_var(b, index, vars->trav.instance_top_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
- rq_store_var(b, index, vars->trav.instance_bottom_node,
- nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 0x1);
+ rq_store_var(b, index, vars->trav.instance_bottom_node, nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 0x1);
rq_store_var(b, index, vars->trav.top_stack, nir_imm_int(b, -1), 1);
- rq_store_var(b, index, vars->incomplete,
- nir_imm_bool(b, !(instance->debug_flags & RADV_DEBUG_NO_RT)), 0x1);
+ rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, !(instance->debug_flags & RADV_DEBUG_NO_RT)), 0x1);
}
static nir_ssa_def *
-lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr,
- struct ray_query_vars *vars)
+lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars)
{
bool committed = nir_intrinsic_committed(instr);
struct ray_query_intersection_vars *intersection = committed ? &vars->closest : &vars->candidate;
return rq_load_var(b, index, intersection->barycentrics);
case nir_ray_query_value_intersection_candidate_aabb_opaque:
return nir_iand(b, rq_load_var(b, index, vars->candidate.opaque),
- nir_ieq_imm(b, rq_load_var(b, index, vars->candidate.intersection_type),
- intersection_type_aabb));
+ nir_ieq_imm(b, rq_load_var(b, index, vars->candidate.intersection_type), intersection_type_aabb));
case nir_ray_query_value_intersection_front_face:
return rq_load_var(b, index, intersection->frontface);
case nir_ray_query_value_intersection_geometry_index:
return nir_iand_imm(b, rq_load_var(b, index, intersection->geometry_id_and_flags), 0xFFFFFF);
case nir_ray_query_value_intersection_instance_custom_index: {
nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
- return nir_iand_imm(b,
- nir_build_load_global(b, 1, 32,
- nir_iadd_imm(b, instance_node_addr,
- offsetof(struct radv_bvh_instance_node,
- custom_instance_and_mask))),
- 0xFFFFFF);
+ return nir_iand_imm(
+ b,
+ nir_build_load_global(
+ b, 1, 32,
+ nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, custom_instance_and_mask))),
+ 0xFFFFFF);
}
case nir_ray_query_value_intersection_instance_id: {
nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
return nir_build_load_global(
- b, 1, 32,
- nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, instance_id)));
+ b, 1, 32, nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, instance_id)));
}
case nir_ray_query_value_intersection_instance_sbt_index:
return nir_iand_imm(b, rq_load_var(b, index, intersection->sbt_offset_and_flags), 0xFFFFFF);
for (unsigned r = 0; r < 3; ++r)
rows[r] = nir_build_load_global(
b, 4, 32,
- nir_iadd_imm(b, instance_node_addr,
- offsetof(struct radv_bvh_instance_node, otw_matrix) + r * 16));
+ nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, otw_matrix) + r * 16));
return nir_vec3(b, nir_channel(b, rows[0], column), nir_channel(b, rows[1], column),
nir_channel(b, rows[2], column));
nir_ssa_def *index = data->index;
rq_store_var(b, index, vars->candidate.primitive_id, intersection->primitive_id, 1);
- rq_store_var(b, index, vars->candidate.geometry_id_and_flags,
- intersection->geometry_id_and_flags, 1);
+ rq_store_var(b, index, vars->candidate.geometry_id_and_flags, intersection->geometry_id_and_flags, 1);
rq_store_var(b, index, vars->candidate.opaque, intersection->opaque, 0x1);
- rq_store_var(b, index, vars->candidate.intersection_type, nir_imm_int(b, intersection_type_aabb),
- 0x1);
+ rq_store_var(b, index, vars->candidate.intersection_type, nir_imm_int(b, intersection_type_aabb), 0x1);
nir_jump(b, nir_jump_break);
}
static void
handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *intersection,
- const struct radv_ray_traversal_args *args,
- const struct radv_ray_flags *ray_flags)
+ const struct radv_ray_traversal_args *args, const struct radv_ray_flags *ray_flags)
{
struct traversal_data *data = args->data;
struct ray_query_vars *vars = data->vars;
rq_store_var(b, index, vars->candidate.barycentrics, intersection->barycentrics, 3);
rq_store_var(b, index, vars->candidate.primitive_id, intersection->base.primitive_id, 1);
- rq_store_var(b, index, vars->candidate.geometry_id_and_flags,
- intersection->base.geometry_id_and_flags, 1);
+ rq_store_var(b, index, vars->candidate.geometry_id_and_flags, intersection->base.geometry_id_and_flags, 1);
rq_store_var(b, index, vars->candidate.t, intersection->t, 0x1);
rq_store_var(b, index, vars->candidate.opaque, intersection->base.opaque, 0x1);
rq_store_var(b, index, vars->candidate.frontface, intersection->frontface, 0x1);
- rq_store_var(b, index, vars->candidate.intersection_type,
- nir_imm_int(b, intersection_type_triangle), 0x1);
+ rq_store_var(b, index, vars->candidate.intersection_type, nir_imm_int(b, intersection_type_triangle), 0x1);
nir_push_if(b, intersection->base.opaque);
{
}
static void
-store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value,
- const struct radv_ray_traversal_args *args)
+store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value, const struct radv_ray_traversal_args *args)
{
struct traversal_data *data = args->data;
if (data->vars->stack)
}
static nir_ssa_def *
-lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars,
- struct radv_device *device)
+lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars, struct radv_device *device)
{
- nir_variable *inv_dir =
- nir_local_variable_create(b->impl, glsl_vector_type(GLSL_TYPE_FLOAT, 3), "inv_dir");
+ nir_variable *inv_dir = nir_local_variable_create(b->impl, glsl_vector_type(GLSL_TYPE_FLOAT, 3), "inv_dir");
nir_store_var(b, inv_dir, nir_frcp(b, rq_load_var(b, index, vars->trav.direction)), 0x7);
struct radv_ray_traversal_vars trav_vars = {
args.stack_stride = 1;
args.stack_base = 0;
} else {
- uint32_t workgroup_size = b->shader->info.workgroup_size[0] *
- b->shader->info.workgroup_size[1] *
- b->shader->info.workgroup_size[2];
+ uint32_t workgroup_size =
+ b->shader->info.workgroup_size[0] * b->shader->info.workgroup_size[1] * b->shader->info.workgroup_size[2];
args.stack_stride = workgroup_size * 4;
args.stack_base = vars->shared_base;
}
nir_push_if(b, rq_load_var(b, index, vars->incomplete));
{
nir_ssa_def *incomplete = radv_build_ray_traversal(device, b, &args);
- rq_store_var(b, index, vars->incomplete,
- nir_iand(b, rq_load_var(b, index, vars->incomplete), incomplete), 1);
+ rq_store_var(b, index, vars->incomplete, nir_iand(b, rq_load_var(b, index, vars->incomplete), incomplete), 1);
}
nir_pop_if(b, NULL);
}
static void
-lower_rq_terminate(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr,
- struct ray_query_vars *vars)
+lower_rq_terminate(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars)
{
rq_store_var(b, index, vars->incomplete, nir_imm_false(b), 0x1);
}
if (!nir_intrinsic_is_ray_query(intrinsic->intrinsic))
continue;
- nir_deref_instr *ray_query_deref =
- nir_instr_as_deref(intrinsic->src[0].ssa->parent_instr);
+ nir_deref_instr *ray_query_deref = nir_instr_as_deref(intrinsic->src[0].ssa->parent_instr);
nir_ssa_def *index = NULL;
if (ray_query_deref->deref_type == nir_deref_type_array) {
assert(ray_query_deref->deref_type == nir_deref_type_var);
struct ray_query_vars *vars =
- (struct ray_query_vars *)_mesa_hash_table_search(query_ht, ray_query_deref->var)
- ->data;
+ (struct ray_query_vars *)_mesa_hash_table_search(query_ht, ray_query_deref->var)->data;
builder.cursor = nir_before_instr(instr);
} lower_vs_inputs_state;
static nir_ssa_def *
-lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin,
- lower_vs_inputs_state *s)
+lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs_state *s)
{
nir_src *offset_src = nir_get_io_offset_src(intrin);
assert(nir_src_is_const(*offset_src));
const unsigned arg_bit_size = MAX2(bit_size, 32);
unsigned num_input_args = 1;
- nir_ssa_def *input_args[2] = {
- ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location]), NULL};
+ nir_ssa_def *input_args[2] = {ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location]), NULL};
if (component * 32 + arg_bit_size * num_components > 128) {
assert(bit_size == 64);
}
static nir_ssa_def *
-oob_input_load_value(nir_builder *b, const unsigned channel_idx, const unsigned bit_size,
- const bool is_float)
+oob_input_load_value(nir_builder *b, const unsigned channel_idx, const unsigned bit_size, const bool is_float)
{
/* 22.1.1. Attribute Location and Component Assignment of Vulkan 1.3 specification:
* For 64-bit data types, no default attribute values are provided. Input variables
}
static unsigned
-count_format_bytes(const struct util_format_description *f, const unsigned first_channel,
- const unsigned num_channels)
+count_format_bytes(const struct util_format_description *f, const unsigned first_channel, const unsigned num_channels)
{
if (!num_channels)
return 0;
}
static unsigned
-first_used_swizzled_channel(const struct util_format_description *f, const unsigned mask,
- const bool backwards)
+first_used_swizzled_channel(const struct util_format_description *f, const unsigned mask, const bool backwards)
{
unsigned first_used = backwards ? 0 : f->nr_channels;
const unsigned it_mask = mask & BITFIELD_MASK(f->nr_channels);
}
static nir_ssa_def *
-adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_adjust,
- nir_ssa_def *alpha)
+adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_adjust, nir_ssa_def *alpha)
{
if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED)
alpha = nir_f2u32(b, alpha);
const struct util_format_description *f = util_format_description(attrib_format);
const struct ac_vtx_format_info *vtx_info =
ac_get_vtx_format_info(s->rad_info->gfx_level, s->rad_info->family, attrib_format);
- const unsigned binding_index =
- s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding;
- const unsigned desc_index =
- util_bitcount(s->info->vs.vb_desc_usage_mask & u_bit_consecutive(0, binding_index));
+ const unsigned binding_index = s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding;
+ const unsigned desc_index = util_bitcount(s->info->vs.vb_desc_usage_mask & u_bit_consecutive(0, binding_index));
nir_ssa_def *vertex_buffers_arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.vertex_buffers);
nir_ssa_def *vertex_buffers =
nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->rad_info->address32_hi));
- nir_ssa_def *descriptor =
- nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16));
+ nir_ssa_def *descriptor = nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16));
nir_ssa_def *base_index = calc_vs_input_index(b, location, s);
nir_ssa_def *zero = nir_imm_int(b, 0);
* Don't shrink the format here because this might allow the backend to
* emit fewer (but larger than needed) HW instructions.
*/
- const unsigned first_trailing_unused_channel =
- first_used_swizzled_channel(f, dest_use_mask, true) + 1;
+ const unsigned first_trailing_unused_channel = first_used_swizzled_channel(f, dest_use_mask, true) + 1;
const unsigned max_loaded_channels = MIN2(first_trailing_unused_channel, f->nr_channels);
const unsigned fetch_num_channels =
first_used_channel >= max_loaded_channels ? 0 : max_loaded_channels - skipped_start;
* Note, NONE seems to occur in real use and is considered an array format.
*/
if (f->is_array && fetch_format != PIPE_FORMAT_NONE) {
- while (channels > 1 && attrib_stride &&
- (const_off + count_format_bytes(f, start, channels)) > attrib_stride) {
+ while (channels > 1 && attrib_stride && (const_off + count_format_bytes(f, start, channels)) > attrib_stride) {
channels--;
}
/* Keep the fetch format as large as possible to let the backend emit
* larger load instructions when it deems them beneficial.
*/
- fetch_format =
- util_format_get_array(f->channel[0].type, f->channel[0].size, f->nr_channels - start,
- f->is_unorm || f->is_snorm, f->channel[0].pure_integer);
+ fetch_format = util_format_get_array(f->channel[0].type, f->channel[0].size, f->nr_channels - start,
+ f->is_unorm || f->is_snorm, f->channel[0].pure_integer);
}
assert(f->is_array || channels == fetch_num_channels);
* Typed loads can cause GPU hangs when used with improper alignment.
*/
if (can_use_untyped_load(f, bit_size)) {
- loads[num_loads++] =
- nir_load_buffer_amd(b, channels, bit_size, descriptor, zero, zero, index,
- .base = const_off, .memory_modes = nir_var_shader_in);
+ loads[num_loads++] = nir_load_buffer_amd(b, channels, bit_size, descriptor, zero, zero, index,
+ .base = const_off, .memory_modes = nir_var_shader_in);
} else {
const unsigned align_mul = MAX2(1, s->pl_key->vs.vertex_binding_align[attrib_binding]);
const unsigned align_offset = const_off % align_mul;
loads[num_loads++] = nir_load_typed_buffer_amd(
- b, channels, bit_size, descriptor, zero, zero, index, .base = const_off,
- .format = fetch_format, .align_mul = align_mul, .align_offset = align_offset,
- .memory_modes = nir_var_shader_in);
+ b, channels, bit_size, descriptor, zero, zero, index, .base = const_off, .format = fetch_format,
+ .align_mul = align_mul, .align_offset = align_offset, .memory_modes = nir_var_shader_in);
}
}
max_loaded_channels - first_used_channel, bit_size);
/* Return early if possible to avoid generating unnecessary IR. */
- if (num_loads > 0 && first_used_channel == component &&
- load->num_components == dest_num_components && !needs_swizzle &&
- alpha_adjust == AC_ALPHA_ADJUST_NONE)
+ if (num_loads > 0 && first_used_channel == component && load->num_components == dest_num_components &&
+ !needs_swizzle && alpha_adjust == AC_ALPHA_ADJUST_NONE)
return load;
/* Fill unused and OOB components.
.rad_info = rad_info,
};
- return nir_shader_instructions_pass(shader, lower_vs_input_instr,
- nir_metadata_dominance | nir_metadata_block_index, &state);
+ return nir_shader_instructions_pass(shader, lower_vs_input_instr, nir_metadata_dominance | nir_metadata_block_index,
+ &state);
}
#include "radv_private.h"
+#include "meta/radv_meta.h"
#include "nir_builder.h"
#include "radv_cs.h"
-#include "meta/radv_meta.h"
#include "radix_sort/radv_radix_sort.h"
}
if (accel_struct) {
- uint64_t bvh_size =
- bvh_leaf_size * leaf_count + sizeof(struct radv_bvh_box32_node) * internal_count;
+ uint64_t bvh_size = bvh_leaf_size * leaf_count + sizeof(struct radv_bvh_box32_node) * internal_count;
uint32_t offset = 0;
offset += sizeof(struct radv_accel_struct_header);
0,
};
if (radv_device_init_accel_struct_build_state(device) == VK_SUCCESS)
- radix_sort_vk_get_memory_requirements(device->meta_state.accel_struct_build.radix_sort,
- leaf_count, &requirements);
+ radix_sort_vk_get_memory_requirements(device->meta_state.accel_struct_build.radix_sort, leaf_count,
+ &requirements);
uint32_t offset = 0;
struct build_config config = build_config(leaf_count, build_info);
if (config.internal_type == INTERNAL_BUILD_TYPE_PLOC)
- ploc_scratch_space = DIV_ROUND_UP(leaf_count, PLOC_WORKGROUP_SIZE) *
- sizeof(struct ploc_prefix_scan_partition);
+ ploc_scratch_space = DIV_ROUND_UP(leaf_count, PLOC_WORKGROUP_SIZE) * sizeof(struct ploc_prefix_scan_partition);
else
lbvh_node_space = sizeof(struct lbvh_node_info) * internal_count;
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetAccelerationStructureBuildSizesKHR(
- VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
- const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
- const uint32_t *pMaxPrimitiveCounts, VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo)
+radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
+ const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
+ const uint32_t *pMaxPrimitiveCounts,
+ VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo)
{
RADV_FROM_HANDLE(radv_device, device, _device);
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_WriteAccelerationStructuresPropertiesKHR(
- VkDevice _device, uint32_t accelerationStructureCount,
- const VkAccelerationStructureKHR *pAccelerationStructures, VkQueryType queryType,
- size_t dataSize, void *pData, size_t stride)
+radv_WriteAccelerationStructuresPropertiesKHR(VkDevice _device, uint32_t accelerationStructureCount,
+ const VkAccelerationStructureKHR *pAccelerationStructures,
+ VkQueryType queryType, size_t dataSize, void *pData, size_t stride)
{
unreachable("Unimplemented");
return VK_ERROR_FEATURE_NOT_PRESENT;
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_BuildAccelerationStructuresKHR(
- VkDevice _device, VkDeferredOperationKHR deferredOperation, uint32_t infoCount,
- const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
- const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
+radv_BuildAccelerationStructuresKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation, uint32_t infoCount,
+ const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
+ const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
{
unreachable("Unimplemented");
return VK_ERROR_FEATURE_NOT_PRESENT;
radv_device_finish_accel_struct_build_state(struct radv_device *device)
{
struct radv_meta_state *state = &device->meta_state;
- radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.copy_pipeline,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.ploc_pipeline,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->accel_struct_build.ploc_extended_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->accel_struct_build.lbvh_generate_ir_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.lbvh_main_pipeline,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.leaf_pipeline,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.encode_pipeline,
+ radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.copy_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.ploc_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.ploc_extended_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.lbvh_generate_ir_pipeline,
&state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->accel_struct_build.encode_compact_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.header_pipeline,
+ radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.lbvh_main_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.leaf_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.encode_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.encode_compact_pipeline,
&state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.morton_pipeline,
- &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->accel_struct_build.copy_p_layout, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->accel_struct_build.ploc_p_layout, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->accel_struct_build.lbvh_generate_ir_p_layout, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->accel_struct_build.lbvh_main_p_layout, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->accel_struct_build.leaf_p_layout, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->accel_struct_build.encode_p_layout, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->accel_struct_build.header_p_layout, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->accel_struct_build.morton_p_layout, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.header_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.morton_pipeline, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.copy_p_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.ploc_p_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.lbvh_generate_ir_p_layout,
+ &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.lbvh_main_p_layout,
+ &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.leaf_p_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.encode_p_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.header_p_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.morton_p_layout, &state->alloc);
if (state->accel_struct_build.radix_sort)
- radix_sort_vk_destroy(state->accel_struct_build.radix_sort, radv_device_to_handle(device),
- &state->alloc);
-
- radv_DestroyBuffer(radv_device_to_handle(device), state->accel_struct_build.null.buffer,
- &state->alloc);
- radv_FreeMemory(radv_device_to_handle(device), state->accel_struct_build.null.memory,
- &state->alloc);
- vk_common_DestroyAccelerationStructureKHR(
- radv_device_to_handle(device), state->accel_struct_build.null.accel_struct, &state->alloc);
+ radix_sort_vk_destroy(state->accel_struct_build.radix_sort, radv_device_to_handle(device), &state->alloc);
+
+ radv_DestroyBuffer(radv_device_to_handle(device), state->accel_struct_build.null.buffer, &state->alloc);
+ radv_FreeMemory(radv_device_to_handle(device), state->accel_struct_build.null.memory, &state->alloc);
+ vk_common_DestroyAccelerationStructureKHR(radv_device_to_handle(device), state->accel_struct_build.null.accel_struct,
+ &state->alloc);
}
static VkResult
create_build_pipeline_spv(struct radv_device *device, const uint32_t *spv, uint32_t spv_size,
- unsigned push_constant_size, VkPipeline *pipeline,
- VkPipelineLayout *layout)
+ unsigned push_constant_size, VkPipeline *pipeline, VkPipelineLayout *layout)
{
if (*pipeline)
return VK_SUCCESS;
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 0,
.pushConstantRangeCount = 1,
- .pPushConstantRanges =
- &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constant_size},
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constant_size},
};
VkShaderModuleCreateInfo module_info = {
};
VkShaderModule module;
- VkResult result = device->vk.dispatch_table.CreateShaderModule(
- radv_device_to_handle(device), &module_info, &device->meta_state.alloc, &module);
+ VkResult result = device->vk.dispatch_table.CreateShaderModule(radv_device_to_handle(device), &module_info,
+ &device->meta_state.alloc, &module);
if (result != VK_SUCCESS)
return result;
if (!*layout) {
- result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
- &device->meta_state.alloc, layout);
+ result =
+ radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, layout);
if (result != VK_SUCCESS)
goto cleanup;
}
.layout = *layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &pipeline_info, &device->meta_state.alloc, pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &pipeline_info,
+ &device->meta_state.alloc, pipeline);
cleanup:
- device->vk.dispatch_table.DestroyShaderModule(radv_device_to_handle(device), module,
- &device->meta_state.alloc);
+ device->vk.dispatch_table.DestroyShaderModule(radv_device_to_handle(device), module, &device->meta_state.alloc);
return result;
}
static void
-radix_sort_fill_buffer(VkCommandBuffer commandBuffer,
- radix_sort_vk_buffer_info_t const *buffer_info, VkDeviceSize offset,
- VkDeviceSize size, uint32_t data)
+radix_sort_fill_buffer(VkCommandBuffer commandBuffer, radix_sort_vk_buffer_info_t const *buffer_info,
+ VkDeviceSize offset, VkDeviceSize size, uint32_t data)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
assert(size != VK_WHOLE_SIZE);
- radv_fill_buffer(cmd_buffer, NULL, NULL, buffer_info->devaddr + buffer_info->offset + offset,
- size, data);
+ radv_fill_buffer(cmd_buffer, NULL, NULL, buffer_info->devaddr + buffer_info->offset + offset, size, data);
}
VkResult
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = mem_req.memoryRequirements.size,
- .memoryTypeIndex =
- radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
+ .memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
};
result = radv_AllocateMemory(_device, &alloc_info, &device->meta_state.alloc, &memory);
.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
};
- result = vk_common_CreateAccelerationStructureKHR(_device, &create_info,
- &device->meta_state.alloc, &accel_struct);
+ result = vk_common_CreateAccelerationStructureKHR(_device, &create_info, &device->meta_state.alloc, &accel_struct);
if (result != VK_SUCCESS)
return result;
if (result != VK_SUCCESS)
goto exit;
- result = create_build_pipeline_spv(device, lbvh_main_spv, sizeof(lbvh_main_spv),
- sizeof(struct lbvh_main_args),
+ result = create_build_pipeline_spv(device, lbvh_main_spv, sizeof(lbvh_main_spv), sizeof(struct lbvh_main_args),
&device->meta_state.accel_struct_build.lbvh_main_pipeline,
&device->meta_state.accel_struct_build.lbvh_main_p_layout);
if (result != VK_SUCCESS)
goto exit;
- result =
- create_build_pipeline_spv(device, lbvh_generate_ir_spv, sizeof(lbvh_generate_ir_spv),
- sizeof(struct lbvh_generate_ir_args),
- &device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline,
- &device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout);
+ result = create_build_pipeline_spv(device, lbvh_generate_ir_spv, sizeof(lbvh_generate_ir_spv),
+ sizeof(struct lbvh_generate_ir_args),
+ &device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline,
+ &device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout);
if (result != VK_SUCCESS)
goto exit;
if (result != VK_SUCCESS)
goto exit;
- result = create_build_pipeline_spv(device, ploc_extended_spv, sizeof(ploc_extended_spv),
- sizeof(struct ploc_args),
+ result = create_build_pipeline_spv(device, ploc_extended_spv, sizeof(ploc_extended_spv), sizeof(struct ploc_args),
&device->meta_state.accel_struct_build.ploc_extended_pipeline,
&device->meta_state.accel_struct_build.ploc_p_layout);
if (result != VK_SUCCESS)
goto exit;
- result =
- create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args),
- &device->meta_state.accel_struct_build.encode_pipeline,
- &device->meta_state.accel_struct_build.encode_p_layout);
+ result = create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args),
+ &device->meta_state.accel_struct_build.encode_pipeline,
+ &device->meta_state.accel_struct_build.encode_p_layout);
if (result != VK_SUCCESS)
goto exit;
- result = create_build_pipeline_spv(
- device, encode_compact_spv, sizeof(encode_compact_spv), sizeof(struct encode_args),
- &device->meta_state.accel_struct_build.encode_compact_pipeline,
- &device->meta_state.accel_struct_build.encode_p_layout);
+ result =
+ create_build_pipeline_spv(device, encode_compact_spv, sizeof(encode_compact_spv), sizeof(struct encode_args),
+ &device->meta_state.accel_struct_build.encode_compact_pipeline,
+ &device->meta_state.accel_struct_build.encode_p_layout);
if (result != VK_SUCCESS)
goto exit;
- result =
- create_build_pipeline_spv(device, header_spv, sizeof(header_spv), sizeof(struct header_args),
- &device->meta_state.accel_struct_build.header_pipeline,
- &device->meta_state.accel_struct_build.header_p_layout);
+ result = create_build_pipeline_spv(device, header_spv, sizeof(header_spv), sizeof(struct header_args),
+ &device->meta_state.accel_struct_build.header_pipeline,
+ &device->meta_state.accel_struct_build.header_p_layout);
if (result != VK_SUCCESS)
goto exit;
- result =
- create_build_pipeline_spv(device, morton_spv, sizeof(morton_spv), sizeof(struct morton_args),
- &device->meta_state.accel_struct_build.morton_pipeline,
- &device->meta_state.accel_struct_build.morton_p_layout);
+ result = create_build_pipeline_spv(device, morton_spv, sizeof(morton_spv), sizeof(struct morton_args),
+ &device->meta_state.accel_struct_build.morton_pipeline,
+ &device->meta_state.accel_struct_build.morton_p_layout);
if (result != VK_SUCCESS)
goto exit;
device->meta_state.accel_struct_build.radix_sort =
- radv_create_radix_sort_u64(radv_device_to_handle(device), &device->meta_state.alloc,
- device->meta_state.cache);
+ radv_create_radix_sort_u64(radv_device_to_handle(device), &device->meta_state.alloc, device->meta_state.cache);
- struct radix_sort_vk_sort_devaddr_info *radix_sort_info =
- &device->meta_state.accel_struct_build.radix_sort_info;
+ struct radix_sort_vk_sort_devaddr_info *radix_sort_info = &device->meta_state.accel_struct_build.radix_sort_info;
radix_sort_info->ext = NULL;
radix_sort_info->key_bits = 24;
radix_sort_info->fill_buffer = radix_sort_fill_buffer;
{
mtx_lock(&device->meta_state.mtx);
- VkResult result =
- create_build_pipeline_spv(device, copy_spv, sizeof(copy_spv), sizeof(struct copy_args),
- &device->meta_state.accel_struct_build.copy_pipeline,
- &device->meta_state.accel_struct_build.copy_p_layout);
+ VkResult result = create_build_pipeline_spv(device, copy_spv, sizeof(copy_spv), sizeof(struct copy_args),
+ &device->meta_state.accel_struct_build.copy_pipeline,
+ &device->meta_state.accel_struct_build.copy_p_layout);
mtx_unlock(&device->meta_state.mtx);
return result;
static void
build_leaves(VkCommandBuffer commandBuffer, uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
- const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos,
- struct bvh_state *bvh_states, enum radv_cmd_flush_bits flush_bits)
+ const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos, struct bvh_state *bvh_states,
+ enum radv_cmd_flush_bits flush_bits)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
case VK_GEOMETRY_TYPE_AABBS_KHR:
assert(pInfos[i].type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
- leaf_consts.data =
- geom->geometry.aabbs.data.deviceAddress + buildRangeInfo->primitiveOffset;
+ leaf_consts.data = geom->geometry.aabbs.data.deviceAddress + buildRangeInfo->primitiveOffset;
leaf_consts.stride = geom->geometry.aabbs.stride;
prim_size = sizeof(struct radv_ir_aabb_node);
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
assert(pInfos[i].type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
- leaf_consts.data =
- geom->geometry.instances.data.deviceAddress + buildRangeInfo->primitiveOffset;
+ leaf_consts.data = geom->geometry.instances.data.deviceAddress + buildRangeInfo->primitiveOffset;
if (geom->geometry.instances.arrayOfPointers)
leaf_consts.stride = 8;
unreachable("Unknown geometryType");
}
- radv_CmdPushConstants(commandBuffer,
- cmd_buffer->device->meta_state.accel_struct_build.leaf_p_layout,
+ radv_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.leaf_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(leaf_consts), &leaf_consts);
radv_unaligned_dispatch(cmd_buffer, buildRangeInfo->primitiveCount, 1, 1);
static void
morton_generate(VkCommandBuffer commandBuffer, uint32_t infoCount,
- const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
- struct bvh_state *bvh_states, enum radv_cmd_flush_bits flush_bits)
+ const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
+ enum radv_cmd_flush_bits flush_bits)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
.ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
};
- radv_CmdPushConstants(commandBuffer,
- cmd_buffer->device->meta_state.accel_struct_build.morton_p_layout,
+ radv_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.morton_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
radv_unaligned_dispatch(cmd_buffer, bvh_states[i].node_count, 1, 1);
}
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
for (uint32_t i = 0; i < infoCount; ++i) {
struct radix_sort_vk_memory_requirements requirements;
- radix_sort_vk_get_memory_requirements(
- cmd_buffer->device->meta_state.accel_struct_build.radix_sort, bvh_states[i].node_count,
- &requirements);
+ radix_sort_vk_get_memory_requirements(cmd_buffer->device->meta_state.accel_struct_build.radix_sort,
+ bvh_states[i].node_count, &requirements);
- struct radix_sort_vk_sort_devaddr_info info =
- cmd_buffer->device->meta_state.accel_struct_build.radix_sort_info;
+ struct radix_sort_vk_sort_devaddr_info info = cmd_buffer->device->meta_state.accel_struct_build.radix_sort_info;
info.count = bvh_states[i].node_count;
info.keyvals_even.buffer = VK_NULL_HANDLE;
info.keyvals_even.offset = 0;
- info.keyvals_even.devaddr =
- pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
+ info.keyvals_even.devaddr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
- info.keyvals_odd =
- pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
+ info.keyvals_odd = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
info.internal.buffer = VK_NULL_HANDLE;
info.internal.offset = 0;
- info.internal.devaddr =
- pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
+ info.internal.devaddr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
VkDeviceAddress result_addr;
- radix_sort_vk_sort_devaddr(cmd_buffer->device->meta_state.accel_struct_build.radix_sort,
- &info, radv_device_to_handle(cmd_buffer->device), commandBuffer,
- &result_addr);
+ radix_sort_vk_sort_devaddr(cmd_buffer->device->meta_state.accel_struct_build.radix_sort, &info,
+ radv_device_to_handle(cmd_buffer->device), commandBuffer, &result_addr);
assert(result_addr == info.keyvals_even.devaddr || result_addr == info.keyvals_odd);
static void
lbvh_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount,
- const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
- struct bvh_state *bvh_states, enum radv_cmd_flush_bits flush_bits)
+ const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
+ enum radv_cmd_flush_bits flush_bits)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
.internal_node_base = bvh_states[i].internal_node_base,
};
- radv_CmdPushConstants(commandBuffer,
- cmd_buffer->device->meta_state.accel_struct_build.lbvh_main_p_layout,
+ radv_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.lbvh_main_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
radv_unaligned_dispatch(cmd_buffer, internal_node_count, 1, 1);
bvh_states[i].node_count = internal_node_count;
cmd_buffer->state.flush_bits |= flush_bits;
- radv_CmdBindPipeline(
- commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
- cmd_buffer->device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline);
+ radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+ cmd_buffer->device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline);
for (uint32_t i = 0; i < infoCount; ++i) {
if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_LBVH)
.internal_node_base = bvh_states[i].internal_node_base,
};
- radv_CmdPushConstants(
- commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
+ radv_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
radv_unaligned_dispatch(cmd_buffer, bvh_states[i].internal_node_count, 1, 1);
}
}
static void
ploc_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount,
- const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
- struct bvh_state *bvh_states, bool extended_sah)
+ const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
+ bool extended_sah)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_CmdBindPipeline(
- commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
- extended_sah ? cmd_buffer->device->meta_state.accel_struct_build.ploc_extended_pipeline
- : cmd_buffer->device->meta_state.accel_struct_build.ploc_pipeline);
+ radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+ extended_sah ? cmd_buffer->device->meta_state.accel_struct_build.ploc_extended_pipeline
+ : cmd_buffer->device->meta_state.accel_struct_build.ploc_pipeline);
for (uint32_t i = 0; i < infoCount; ++i) {
if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_PLOC)
.task_counts = {TASK_INDEX_INVALID, TASK_INDEX_INVALID},
};
radv_update_buffer_cp(cmd_buffer,
- pInfos[i].scratchData.deviceAddress +
- bvh_states[i].scratch.header_offset +
+ pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset +
offsetof(struct radv_ir_header, sync_data),
&initial_sync_data, sizeof(struct radv_global_sync_data));
uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
- uint32_t dst_scratch_offset =
- (src_scratch_offset == bvh_states[i].scratch.sort_buffer_offset[0])
- ? bvh_states[i].scratch.sort_buffer_offset[1]
- : bvh_states[i].scratch.sort_buffer_offset[0];
+ uint32_t dst_scratch_offset = (src_scratch_offset == bvh_states[i].scratch.sort_buffer_offset[0])
+ ? bvh_states[i].scratch.sort_buffer_offset[1]
+ : bvh_states[i].scratch.sort_buffer_offset[0];
const struct ploc_args consts = {
.bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
.header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
.ids_0 = pInfos[i].scratchData.deviceAddress + src_scratch_offset,
.ids_1 = pInfos[i].scratchData.deviceAddress + dst_scratch_offset,
- .prefix_scan_partitions = pInfos[i].scratchData.deviceAddress +
- bvh_states[i].scratch.ploc_prefix_sum_partition_offset,
+ .prefix_scan_partitions =
+ pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ploc_prefix_sum_partition_offset,
.internal_node_offset = bvh_states[i].internal_node_base,
};
- radv_CmdPushConstants(commandBuffer,
- cmd_buffer->device->meta_state.accel_struct_build.ploc_p_layout,
+ radv_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.ploc_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
- vk_common_CmdDispatch(commandBuffer,
- MAX2(DIV_ROUND_UP(bvh_states[i].node_count, PLOC_WORKGROUP_SIZE), 1), 1, 1);
+ vk_common_CmdDispatch(commandBuffer, MAX2(DIV_ROUND_UP(bvh_states[i].node_count, PLOC_WORKGROUP_SIZE), 1), 1, 1);
}
}
static void
encode_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount,
- const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
- struct bvh_state *bvh_states, bool compact)
+ const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states, bool compact)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_CmdBindPipeline(
- commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
- compact ? cmd_buffer->device->meta_state.accel_struct_build.encode_compact_pipeline
- : cmd_buffer->device->meta_state.accel_struct_build.encode_pipeline);
+ radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+ compact ? cmd_buffer->device->meta_state.accel_struct_build.encode_compact_pipeline
+ : cmd_buffer->device->meta_state.accel_struct_build.encode_pipeline);
for (uint32_t i = 0; i < infoCount; ++i) {
if (compact != bvh_states[i].config.compact)
* because it will be multiplied with 0.
*/
if (pInfos[i].geometryCount)
- geometry_type = pInfos[i].pGeometries ? pInfos[i].pGeometries[0].geometryType
- : pInfos[i].ppGeometries[0]->geometryType;
+ geometry_type =
+ pInfos[i].pGeometries ? pInfos[i].pGeometries[0].geometryType : pInfos[i].ppGeometries[0]->geometryType;
if (bvh_states[i].config.compact) {
uint32_t leaf_node_size = 0;
unreachable("");
}
- uint32_t dst_offset =
- sizeof(struct radv_bvh_box32_node) + bvh_states[i].leaf_node_count * leaf_node_size;
+ uint32_t dst_offset = sizeof(struct radv_bvh_box32_node) + bvh_states[i].leaf_node_count * leaf_node_size;
radv_update_buffer_cp(cmd_buffer,
- pInfos[i].scratchData.deviceAddress +
- bvh_states[i].scratch.header_offset +
+ pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset +
offsetof(struct radv_ir_header, dst_node_offset),
&dst_offset, sizeof(uint32_t));
}
const struct encode_args args = {
.intermediate_bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
- .output_bvh =
- vk_acceleration_structure_get_va(accel_struct) + bvh_states[i].accel_struct.bvh_offset,
+ .output_bvh = vk_acceleration_structure_get_va(accel_struct) + bvh_states[i].accel_struct.bvh_offset,
.header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
.output_bvh_offset = bvh_states[i].accel_struct.bvh_offset,
.leaf_node_count = bvh_states[i].leaf_node_count,
.geometry_type = geometry_type,
};
- radv_CmdPushConstants(commandBuffer,
- cmd_buffer->device->meta_state.accel_struct_build.encode_p_layout,
+ radv_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.encode_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
struct radv_dispatch_info dispatch = {
RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
size_t base = offsetof(struct radv_accel_struct_header, compacted_size);
- uint64_t instance_count = pInfos[i].type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR
- ? bvh_states[i].leaf_node_count
- : 0;
+ uint64_t instance_count =
+ pInfos[i].type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR ? bvh_states[i].leaf_node_count : 0;
if (bvh_states[i].config.compact) {
base = offsetof(struct radv_accel_struct_header, geometry_count);
.instance_count = instance_count,
};
- radv_CmdPushConstants(commandBuffer,
- cmd_buffer->device->meta_state.accel_struct_build.header_p_layout,
+ radv_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.header_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
radv_unaligned_dispatch(cmd_buffer, 1, 1, 1);
struct radv_accel_struct_header header;
- header.instance_offset =
- bvh_states[i].accel_struct.bvh_offset + sizeof(struct radv_bvh_box32_node);
+ header.instance_offset = bvh_states[i].accel_struct.bvh_offset + sizeof(struct radv_bvh_box32_node);
header.instance_count = instance_count;
header.compacted_size = bvh_states[i].accel_struct.size;
header.copy_dispatch_size[2] = 1;
header.serialization_size =
- header.compacted_size + align(sizeof(struct radv_accel_struct_serialization_header) +
- sizeof(uint64_t) * header.instance_count,
- 128);
+ header.compacted_size +
+ align(sizeof(struct radv_accel_struct_serialization_header) + sizeof(uint64_t) * header.instance_count, 128);
- header.size = header.serialization_size -
- sizeof(struct radv_accel_struct_serialization_header) -
+ header.size = header.serialization_size - sizeof(struct radv_accel_struct_serialization_header) -
sizeof(uint64_t) * header.instance_count;
header.build_flags = pInfos[i].flags;
static void
init_geometry_infos(VkCommandBuffer commandBuffer, uint32_t infoCount,
- const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
- struct bvh_state *bvh_states,
+ const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
{
for (uint32_t i = 0; i < infoCount; ++i) {
RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
- uint64_t geometry_infos_size =
- pInfos[i].geometryCount * sizeof(struct radv_accel_struct_geometry_info);
+ uint64_t geometry_infos_size = pInfos[i].geometryCount * sizeof(struct radv_accel_struct_geometry_info);
struct radv_accel_struct_geometry_info *geometry_infos = malloc(geometry_infos_size);
if (!geometry_infos)
}
radv_CmdUpdateBuffer(commandBuffer, accel_struct->buffer,
- accel_struct->offset + bvh_states[i].accel_struct.geometry_info_offset,
- geometry_infos_size, geometry_infos);
+ accel_struct->offset + bvh_states[i].accel_struct.geometry_info_offset, geometry_infos_size,
+ geometry_infos);
free(geometry_infos);
}
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdBuildAccelerationStructuresKHR(
- VkCommandBuffer commandBuffer, uint32_t infoCount,
- const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
- const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
+radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
+ const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
+ const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_meta_saved_state saved_state;
enum radv_cmd_flush_bits flush_bits =
RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT,
- NULL) |
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT,
- NULL);
-
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, NULL) |
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
struct bvh_state *bvh_states = calloc(infoCount, sizeof(struct bvh_state));
for (uint32_t i = 0; i < infoCount; ++i) {
.dispatch_size_z = 1,
};
- radv_update_buffer_cp(
- cmd_buffer, pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
- &header, sizeof(header));
+ radv_update_buffer_cp(cmd_buffer, pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
+ &header, sizeof(header));
}
cmd_buffer->state.flush_bits |= flush_bits;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,
- const VkCopyAccelerationStructureInfoKHR *pInfo)
+radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
return;
}
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
cmd_buffer->device->meta_state.accel_struct_build.copy_pipeline);
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
+ cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(consts), &consts);
- cmd_buffer->state.flush_bits |=
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
+ cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
- radv_indirect_dispatch(cmd_buffer, src_buffer->bo,
- vk_acceleration_structure_get_va(src) +
- offsetof(struct radv_accel_struct_header, copy_dispatch_size));
+ radv_indirect_dispatch(
+ cmd_buffer, src_buffer->bo,
+ vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
radv_meta_restore(&saved_state, cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetDeviceAccelerationStructureCompatibilityKHR(
- VkDevice _device, const VkAccelerationStructureVersionInfoKHR *pVersionInfo,
- VkAccelerationStructureCompatibilityKHR *pCompatibility)
+radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,
+ const VkAccelerationStructureVersionInfoKHR *pVersionInfo,
+ VkAccelerationStructureCompatibilityKHR *pCompatibility)
{
RADV_FROM_HANDLE(radv_device, device, _device);
bool compat =
memcmp(pVersionInfo->pVersionData, device->physical_device->driver_uuid, VK_UUID_SIZE) == 0 &&
- memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, device->physical_device->cache_uuid,
- VK_UUID_SIZE) == 0;
+ memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, device->physical_device->cache_uuid, VK_UUID_SIZE) == 0;
*pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
: VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device,
- VkDeferredOperationKHR deferredOperation,
+radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
{
unreachable("Unimplemented");
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device,
- VkDeferredOperationKHR deferredOperation,
+radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
{
unreachable("Unimplemented");
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdCopyMemoryToAccelerationStructureKHR(
- VkCommandBuffer commandBuffer, const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
+radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
+ const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
return;
}
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
cmd_buffer->device->meta_state.accel_struct_build.copy_pipeline);
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
+ cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(consts), &consts);
vk_common_CmdDispatch(commandBuffer, 512, 1, 1);
radv_meta_restore(&saved_state, cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdCopyAccelerationStructureToMemoryKHR(
- VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
+radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
+ const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
return;
}
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
cmd_buffer->device->meta_state.accel_struct_build.copy_pipeline);
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
+ cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(consts), &consts);
- cmd_buffer->state.flush_bits |=
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
+ cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
- radv_indirect_dispatch(cmd_buffer, src_buffer->bo,
- vk_acceleration_structure_get_va(src) +
- offsetof(struct radv_accel_struct_header, copy_dispatch_size));
+ radv_indirect_dispatch(
+ cmd_buffer, src_buffer->bo,
+ vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
radv_meta_restore(&saved_state, cmd_buffer);
/* Set the header of the serialized data. */
uint8_t header_data[2 * VK_UUID_SIZE];
memcpy(header_data, cmd_buffer->device->physical_device->driver_uuid, VK_UUID_SIZE);
- memcpy(header_data + VK_UUID_SIZE, cmd_buffer->device->physical_device->cache_uuid,
- VK_UUID_SIZE);
+ memcpy(header_data + VK_UUID_SIZE, cmd_buffer->device->physical_device->cache_uuid, VK_UUID_SIZE);
radv_update_buffer_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data));
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdBuildAccelerationStructuresIndirectKHR(
- VkCommandBuffer commandBuffer, uint32_t infoCount,
- const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
- const VkDeviceAddress *pIndirectDeviceAddresses, const uint32_t *pIndirectStrides,
- const uint32_t *const *ppMaxPrimitiveCounts)
+radv_CmdBuildAccelerationStructuresIndirectKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
+ const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
+ const VkDeviceAddress *pIndirectDeviceAddresses,
+ const uint32_t *pIndirectStrides,
+ const uint32_t *const *ppMaxPrimitiveCounts)
{
unreachable("Unimplemented");
}
#include "aco_shader_info.h"
-#define ASSIGN_FIELD(x) aco_info->x = radv->x
+#define ASSIGN_FIELD(x) aco_info->x = radv->x
#define ASSIGN_FIELD_CP(x) memcpy(&aco_info->x, &radv->x, sizeof(radv->x))
static inline void radv_aco_convert_ps_epilog_key(struct aco_ps_epilog_info *aco_info,
static inline void
radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv_shader_info *radv,
- const struct radv_shader_args *radv_args,
- const struct radv_pipeline_key *radv_key)
+ const struct radv_shader_args *radv_args, const struct radv_pipeline_key *radv_key)
{
ASSIGN_FIELD(wave_size);
ASSIGN_FIELD(is_ngg);
aco_info->ps.epilog_pc = radv_args->ps_epilog_pc;
}
-#define ASSIGN_VS_STATE_FIELD(x) aco_info->state.x = radv->state->x
+#define ASSIGN_VS_STATE_FIELD(x) aco_info->state.x = radv->state->x
#define ASSIGN_VS_STATE_FIELD_CP(x) memcpy(&aco_info->state.x, &radv->state->x, sizeof(radv->state->x))
static inline void
-radv_aco_convert_vs_prolog_key(struct aco_vs_prolog_info *aco_info,
- const struct radv_vs_prolog_key *radv,
+radv_aco_convert_vs_prolog_key(struct aco_vs_prolog_info *aco_info, const struct radv_vs_prolog_key *radv,
const struct radv_shader_args *radv_args)
{
ASSIGN_VS_STATE_FIELD(instance_rate_inputs);
}
static inline void
-radv_aco_convert_ps_epilog_key(struct aco_ps_epilog_info *aco_info,
- const struct radv_ps_epilog_key *radv,
+radv_aco_convert_ps_epilog_key(struct aco_ps_epilog_info *aco_info, const struct radv_ps_epilog_key *radv,
const struct radv_shader_args *radv_args)
{
ASSIGN_FIELD(spi_shader_col_format);
}
static inline void
-radv_aco_convert_opts(struct aco_compiler_options *aco_info,
- const struct radv_nir_compiler_options *radv,
+radv_aco_convert_opts(struct aco_compiler_options *aco_info, const struct radv_nir_compiler_options *radv,
const struct radv_shader_args *radv_args)
{
ASSIGN_FIELD(dump_shader);
VkResult
radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
- const VkNativeBufferANDROID *gralloc_info,
- const VkAllocationCallbacks *alloc, VkImage *out_image_h)
+ const VkNativeBufferANDROID *gralloc_info, const VkAllocationCallbacks *alloc,
+ VkImage *out_image_h)
{
RADV_FROM_HANDLE(radv_device, device, device_h);
radv_image_override_offset_stride(device, image, 0, gralloc_info->stride);
- VkBindImageMemoryInfo bind_info = {
- .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO,
- .image = image_h,
- .memory = memory_h,
- .memoryOffset = 0
- };
+ VkBindImageMemoryInfo bind_info = {.sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO,
+ .image = image_h,
+ .memory = memory_h,
+ .memoryOffset = 0};
radv_BindImageMemory2(device_h, 1, &bind_info);
image->owned_memory = memory_h;
}
VkResult
-radv_GetSwapchainGrallocUsageANDROID(VkDevice device_h, VkFormat format,
- VkImageUsageFlags imageUsage, int *grallocUsage)
+radv_GetSwapchainGrallocUsageANDROID(VkDevice device_h, VkFormat format, VkImageUsageFlags imageUsage,
+ int *grallocUsage)
{
RADV_FROM_HANDLE(radv_device, device, device_h);
struct radv_physical_device *phys_dev = device->physical_device;
};
/* Check that requested format and usage are supported. */
- result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, &image_format_info,
- &image_format_props);
+ result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, &image_format_info, &image_format_props);
if (result != VK_SUCCESS) {
return vk_errorf(device, result,
"radv_GetPhysicalDeviceImageFormatProperties2 failed "
if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
*grallocUsage |= GRALLOC_USAGE_HW_RENDER;
- if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
- VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
+ if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT |
+ VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
*grallocUsage |= GRALLOC_USAGE_HW_TEXTURE;
/* All VkImageUsageFlags not explicitly checked here are unsupported for
* what we need for 30-bit colors.
*/
if (format == VK_FORMAT_B8G8R8A8_UNORM || format == VK_FORMAT_B5G6R5_UNORM_PACK16) {
- *grallocUsage |=
- GRALLOC_USAGE_HW_FB | GRALLOC_USAGE_HW_COMPOSER | GRALLOC_USAGE_EXTERNAL_DISP;
+ *grallocUsage |= GRALLOC_USAGE_HW_FB | GRALLOC_USAGE_HW_COMPOSER | GRALLOC_USAGE_EXTERNAL_DISP;
}
if (*grallocUsage == 0)
}
VkResult
-radv_GetSwapchainGrallocUsage2ANDROID(VkDevice device_h, VkFormat format,
- VkImageUsageFlags imageUsage,
+radv_GetSwapchainGrallocUsage2ANDROID(VkDevice device_h, VkFormat format, VkImageUsageFlags imageUsage,
VkSwapchainImageUsageFlagsANDROID swapchainImageUsage,
- uint64_t *grallocConsumerUsage,
- uint64_t *grallocProducerUsage)
+ uint64_t *grallocConsumerUsage, uint64_t *grallocProducerUsage)
{
/* Before level 26 (Android 8.0/Oreo) the loader uses
* vkGetSwapchainGrallocUsageANDROID. */
};
/* Check that requested format and usage are supported. */
- result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, &image_format_info,
- &image_format_props);
+ result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, &image_format_info, &image_format_props);
if (result != VK_SUCCESS) {
return vk_errorf(device, result,
"radv_GetPhysicalDeviceImageFormatProperties2 failed "
__func__);
}
- if (unmask32(&imageUsage,
- VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) {
+ if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) {
*grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET;
*grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_CLIENT_TARGET;
}
- if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
- VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
+ if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT |
+ VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
*grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_GPU_TEXTURE;
}
AHardwareBuffer_describe(buffer, &desc);
/* Verify description. */
- const uint64_t gpu_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE |
- AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT |
+ const uint64_t gpu_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE | AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT |
AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
/* "Buffer must be a valid Android hardware buffer object with at least
p->format = vk_format_from_android(desc.format, desc.usage);
p->externalFormat = (uint64_t)(uintptr_t)p->format;
- VkFormatProperties2 format_properties = {
- .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2
- };
+ VkFormatProperties2 format_properties = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2};
- radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device),
- p->format, &format_properties);
+ radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device), p->format,
+ &format_properties);
if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
p->formatFeatures = format_properties.formatProperties.linearTilingFeatures;
AHardwareBuffer_describe(buffer, &desc);
/* Verify description. */
- const uint64_t gpu_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE |
- AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT |
+ const uint64_t gpu_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE | AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT |
AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
/* "Buffer must be a valid Android hardware buffer object with at least
p->format = vk_format_from_android(desc.format, desc.usage);
p->externalFormat = (uint64_t)(uintptr_t)p->format;
- VkFormatProperties2 format_properties = {
- .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2
- };
+ VkFormatProperties2 format_properties = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2};
- radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device),
- p->format, &format_properties);
+ radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device), p->format,
+ &format_properties);
if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
p->formatFeatures = format_properties.formatProperties.linearTilingFeatures;
}
VkResult
-radv_GetAndroidHardwareBufferPropertiesANDROID(VkDevice device_h,
- const struct AHardwareBuffer *buffer,
+radv_GetAndroidHardwareBufferPropertiesANDROID(VkDevice device_h, const struct AHardwareBuffer *buffer,
VkAndroidHardwareBufferPropertiesANDROID *pProperties)
{
RADV_FROM_HANDLE(radv_device, dev, device_h);
}
VkResult
-radv_GetMemoryAndroidHardwareBufferANDROID(VkDevice device_h,
- const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo,
+radv_GetMemoryAndroidHardwareBufferANDROID(VkDevice device_h, const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo,
struct AHardwareBuffer **pBuffer)
{
RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory);
radv_select_android_external_format(const void *next, VkFormat default_format)
{
#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
- const VkExternalFormatANDROID *android_format =
- vk_find_struct_const(next, EXTERNAL_FORMAT_ANDROID);
+ const VkExternalFormatANDROID *android_format = vk_find_struct_const(next, EXTERNAL_FORMAT_ANDROID);
if (android_format && android_format->externalFormat) {
return (VkFormat)android_format->externalFormat;
}
VkResult
-radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
- unsigned priority, const VkImportAndroidHardwareBufferInfoANDROID *info)
+radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, unsigned priority,
+ const VkImportAndroidHardwareBufferInfoANDROID *info)
{
#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
/* Import from AHardwareBuffer to radv_device_memory. */
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
uint64_t alloc_size = 0;
- VkResult result =
- device->ws->buffer_from_fd(device->ws, dma_buf, priority, &mem->bo, &alloc_size);
+ VkResult result = device->ws->buffer_from_fd(device->ws, dma_buf, priority, &mem->bo, &alloc_size);
if (result != VK_SUCCESS)
return result;
struct radeon_bo_metadata metadata;
device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);
- struct radv_image_create_info create_info = {.no_metadata_planes = true,
- .bo_metadata = &metadata};
+ struct radv_image_create_info create_info = {.no_metadata_planes = true, .bo_metadata = &metadata};
result = radv_image_create_layout(device, create_info, NULL, NULL, mem->image);
if (result != VK_SUCCESS) {
}
VkResult
-radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
- unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo)
+radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, unsigned priority,
+ const VkMemoryAllocateInfo *pAllocateInfo)
{
#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
mem->android_hardware_buffer = vk_alloc_ahardware_buffer(pAllocateInfo);
#include "vk_common_entrypoints.h"
void
-radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device,
- struct radeon_winsys_bo *bo, uint64_t size,
+radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device, struct radeon_winsys_bo *bo, uint64_t size,
uint64_t offset)
{
VkBufferCreateInfo createInfo = {
}
static void
-radv_destroy_buffer(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
- struct radv_buffer *buffer)
+radv_destroy_buffer(struct radv_device *device, const VkAllocationCallbacks *pAllocator, struct radv_buffer *buffer)
{
if ((buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo)
device->ws->buffer_destroy(device->ws, buffer->bo);
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
#endif
- buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (buffer == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (replay_info && replay_info->opaqueCaptureAddress)
replay_address = replay_info->opaqueCaptureAddress;
- VkResult result =
- device->ws->buffer_create(device->ws, align64(buffer->vk.size, 4096), 4096, 0, flags,
- RADV_BO_PRIORITY_VIRTUAL, replay_address, &buffer->bo);
+ VkResult result = device->ws->buffer_create(device->ws, align64(buffer->vk.size, 4096), 4096, 0, flags,
+ RADV_BO_PRIORITY_VIRTUAL, replay_address, &buffer->bo);
if (result != VK_SUCCESS) {
radv_destroy_buffer(device, pAllocator, buffer);
return vk_error(device, result);
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer)
+radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator,
+ VkBuffer *pBuffer)
{
RADV_FROM_HANDLE(radv_device, device, _device);
return radv_create_buffer(device, pCreateInfo, pAllocator, pBuffer, false);
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount,
- const VkBindBufferMemoryInfo *pBindInfos)
+radv_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount, const VkBindBufferMemoryInfo *pBindInfos)
{
RADV_FROM_HANDLE(radv_device, device, _device);
vk_common_GetBufferMemoryRequirements2(_device, &info, &reqs);
if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) {
- return vk_errorf(device, VK_ERROR_UNKNOWN,
- "Device memory object too small for the buffer.\n");
+ return vk_errorf(device, VK_ERROR_UNKNOWN, "Device memory object too small for the buffer.\n");
}
}
}
static void
-radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize size,
- VkBufferCreateFlags flags, VkBufferUsageFlags usage,
- VkMemoryRequirements2 *pMemoryRequirements)
+radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize size, VkBufferCreateFlags flags,
+ VkBufferUsageFlags usage, VkMemoryRequirements2 *pMemoryRequirements)
{
pMemoryRequirements->memoryRequirements.memoryTypeBits =
((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) &
* intersection is non-zero at least)
*/
if ((usage & VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT) && device->uses_device_generated_commands)
- pMemoryRequirements->memoryRequirements.memoryTypeBits |=
- device->physical_device->memory_types_32bit;
+ pMemoryRequirements->memoryRequirements.memoryTypeBits |= device->physical_device->memory_types_32bit;
/* Force 32-bit address-space for descriptor buffers usage because they are passed to shaders
* through 32-bit pointers.
*/
- if (usage & (VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT |
- VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT))
- pMemoryRequirements->memoryRequirements.memoryTypeBits =
- device->physical_device->memory_types_32bit;
+ if (usage & (VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT))
+ pMemoryRequirements->memoryRequirements.memoryTypeBits = device->physical_device->memory_types_32bit;
if (flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
pMemoryRequirements->memoryRequirements.alignment = 4096;
* be 64 byte aligned.
*/
if (usage & VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR)
- pMemoryRequirements->memoryRequirements.alignment =
- MAX2(pMemoryRequirements->memoryRequirements.alignment, 64);
+ pMemoryRequirements->memoryRequirements.alignment = MAX2(pMemoryRequirements->memoryRequirements.alignment, 64);
- pMemoryRequirements->memoryRequirements.size =
- align64(size, pMemoryRequirements->memoryRequirements.alignment);
+ pMemoryRequirements->memoryRequirements.size = align64(size, pMemoryRequirements->memoryRequirements.alignment);
- vk_foreach_struct(ext, pMemoryRequirements->pNext)
- {
+ vk_foreach_struct (ext, pMemoryRequirements->pNext) {
switch (ext->sType) {
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetDeviceBufferMemoryRequirements(VkDevice _device,
- const VkDeviceBufferMemoryRequirements *pInfo,
+radv_GetDeviceBufferMemoryRequirements(VkDevice _device, const VkDeviceBufferMemoryRequirements *pInfo,
VkMemoryRequirements2 *pMemoryRequirements)
{
RADV_FROM_HANDLE(radv_device, device, _device);
* IN THE SOFTWARE.
*/
+#include "meta/radv_meta.h"
#include "radv_cs.h"
#include "radv_debug.h"
-#include "meta/radv_meta.h"
#include "radv_private.h"
#include "radv_radeon_winsys.h"
#include "radv_shader.h"
#include "sid.h"
-#include "vk_format.h"
-#include "vk_util.h"
-#include "vk_enum_defines.h"
#include "vk_common_entrypoints.h"
-#include "vk_render_pass.h"
+#include "vk_enum_defines.h"
+#include "vk_format.h"
#include "vk_framebuffer.h"
+#include "vk_render_pass.h"
+#include "vk_util.h"
#include "ac_debug.h"
#include "ac_shader_args.h"
RADV_PREFETCH_GS = (1 << 4),
RADV_PREFETCH_PS = (1 << 5),
RADV_PREFETCH_MS = (1 << 6),
- RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS | RADV_PREFETCH_TCS | RADV_PREFETCH_TES |
- RADV_PREFETCH_GS | RADV_PREFETCH_PS | RADV_PREFETCH_MS)
+ RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS | RADV_PREFETCH_TCS | RADV_PREFETCH_TES | RADV_PREFETCH_GS |
+ RADV_PREFETCH_PS | RADV_PREFETCH_MS)
};
-static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- VkImageLayout src_layout, VkImageLayout dst_layout,
- uint32_t src_family_index, uint32_t dst_family_index,
- const VkImageSubresourceRange *range,
+static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ VkImageLayout src_layout, VkImageLayout dst_layout, uint32_t src_family_index,
+ uint32_t dst_family_index, const VkImageSubresourceRange *range,
struct radv_sample_locations_state *sample_locs);
static void
dest_mask |= RADV_DYNAMIC_VIEWPORT;
}
- if (memcmp(&dest->vk.vp.viewports, &src->vk.vp.viewports,
- src->vk.vp.viewport_count * sizeof(VkViewport))) {
+ if (memcmp(&dest->vk.vp.viewports, &src->vk.vp.viewports, src->vk.vp.viewport_count * sizeof(VkViewport))) {
typed_memcpy(dest->vk.vp.viewports, src->vk.vp.viewports, src->vk.vp.viewport_count);
typed_memcpy(dest->hw_vp.xform, src->hw_vp.xform, src->vk.vp.viewport_count);
dest_mask |= RADV_DYNAMIC_VIEWPORT;
dest_mask |= RADV_DYNAMIC_SCISSOR;
}
- if (memcmp(&dest->vk.vp.scissors, &src->vk.vp.scissors,
- src->vk.vp.scissor_count * sizeof(VkRect2D))) {
+ if (memcmp(&dest->vk.vp.scissors, &src->vk.vp.scissors, src->vk.vp.scissor_count * sizeof(VkRect2D))) {
typed_memcpy(dest->vk.vp.scissors, src->vk.vp.scissors, src->vk.vp.scissor_count);
dest_mask |= RADV_DYNAMIC_SCISSOR;
}
}
if (copy_mask & RADV_DYNAMIC_DISCARD_RECTANGLE) {
- if (memcmp(&dest->vk.dr.rectangles, &src->vk.dr.rectangles,
- src->vk.dr.rectangle_count * sizeof(VkRect2D))) {
+ if (memcmp(&dest->vk.dr.rectangles, &src->vk.dr.rectangles, src->vk.dr.rectangle_count * sizeof(VkRect2D))) {
typed_memcpy(dest->vk.dr.rectangles, src->vk.dr.rectangles, src->vk.dr.rectangle_count);
dest_mask |= RADV_DYNAMIC_DISCARD_RECTANGLE;
}
src->sample_location.count * sizeof(VkSampleLocationEXT))) {
dest->sample_location.per_pixel = src->sample_location.per_pixel;
dest->sample_location.grid_size = src->sample_location.grid_size;
- typed_memcpy(dest->sample_location.locations, src->sample_location.locations,
- src->sample_location.count);
+ typed_memcpy(dest->sample_location.locations, src->sample_location.locations, src->sample_location.count);
dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
}
}
}
}
-#define RADV_CMP_COPY(field, flag) \
- if (copy_mask & flag) { \
- if (dest->field != src->field) { \
- dest->field = src->field; \
- dest_mask |= flag; \
- } \
+#define RADV_CMP_COPY(field, flag) \
+ if (copy_mask & flag) { \
+ if (dest->field != src->field) { \
+ dest->field = src->field; \
+ dest_mask |= flag; \
+ } \
}
RADV_CMP_COPY(vk.ia.primitive_topology, RADV_DYNAMIC_PRIMITIVE_TOPOLOGY);
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_GUARDBAND;
}
- if (cmd_buffer->device->physical_device->rad_info.rbplus_allowed &&
- (dest_mask & RADV_DYNAMIC_COLOR_WRITE_MASK)) {
+ if (cmd_buffer->device->physical_device->rad_info.rbplus_allowed && (dest_mask & RADV_DYNAMIC_COLOR_WRITE_MASK)) {
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RBPLUS;
}
}
bool
radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer)
{
- return cmd_buffer->qf == RADV_QUEUE_COMPUTE &&
- cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7;
+ return cmd_buffer->qf == RADV_QUEUE_COMPUTE && cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7;
}
enum amd_ip_type
-radv_queue_family_to_ring(const struct radv_physical_device *physical_device,
- enum radv_queue_family f)
+radv_queue_family_to_ring(const struct radv_physical_device *physical_device, enum radv_queue_family f)
{
switch (f) {
case RADV_QUEUE_GENERAL:
}
static void
-radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va,
- unsigned count, const uint32_t *data)
+radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va, unsigned count,
+ const uint32_t *data)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
}
static void
-radv_emit_clear_data(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va,
- unsigned size)
+radv_emit_clear_data(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va, unsigned size)
{
uint32_t *zeroes = alloca(size);
memset(zeroes, 0, size);
{
struct radv_cmd_buffer *cmd_buffer = container_of(vk_cmd_buffer, struct radv_cmd_buffer, vk);
- list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list)
- {
+ list_for_each_entry_safe (struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) {
radv_rmv_log_command_buffer_bo_destroy(cmd_buffer->device, up->upload_bo);
cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo);
list_del(&up->list);
}
static VkResult
-radv_create_cmd_buffer(struct vk_command_pool *pool,
- struct vk_command_buffer **cmd_buffer_out)
+radv_create_cmd_buffer(struct vk_command_pool *pool, struct vk_command_buffer **cmd_buffer_out)
{
struct radv_device *device = container_of(pool->base.device, struct radv_device, vk);
struct radv_cmd_buffer *cmd_buffer;
unsigned ring;
- cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (cmd_buffer == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
- VkResult result =
- vk_command_buffer_init(pool, &cmd_buffer->vk, &radv_cmd_buffer_ops, 0);
+ VkResult result = vk_command_buffer_init(pool, &cmd_buffer->vk, &radv_cmd_buffer_ops, 0);
if (result != VK_SUCCESS) {
vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer);
return result;
ring = radv_queue_family_to_ring(device->physical_device, cmd_buffer->qf);
- cmd_buffer->cs = device->ws->cs_create(
- device->ws, ring, cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
+ cmd_buffer->cs = device->ws->cs_create(device->ws, ring, cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
if (!cmd_buffer->cs) {
radv_destroy_cmd_buffer(&cmd_buffer->vk);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
- vk_object_base_init(&device->vk, &cmd_buffer->meta_push_descriptors.base,
- VK_OBJECT_TYPE_DESCRIPTOR_SET);
+ vk_object_base_init(&device->vk, &cmd_buffer->meta_push_descriptors.base, VK_OBJECT_TYPE_DESCRIPTOR_SET);
for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
- vk_object_base_init(&device->vk, &cmd_buffer->descriptors[i].push_set.set.base,
- VK_OBJECT_TYPE_DESCRIPTOR_SET);
+ vk_object_base_init(&device->vk, &cmd_buffer->descriptors[i].push_set.set.base, VK_OBJECT_TYPE_DESCRIPTOR_SET);
*cmd_buffer_out = &cmd_buffer->vk;
}
static void
-radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
- UNUSED VkCommandBufferResetFlags flags)
+radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, UNUSED VkCommandBufferResetFlags flags)
{
struct radv_cmd_buffer *cmd_buffer = container_of(vk_cmd_buffer, struct radv_cmd_buffer, vk);
if (cmd_buffer->gang.cs)
cmd_buffer->device->ws->cs_reset(cmd_buffer->gang.cs);
- list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list)
- {
+ list_for_each_entry_safe (struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) {
radv_rmv_log_command_buffer_bo_destroy(cmd_buffer->device, up->upload_bo);
cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo);
list_del(&up->list);
new_size = MAX2(min_needed, 16 * 1024);
new_size = MAX2(new_size, 2 * cmd_buffer->upload.size);
- VkResult result =
- device->ws->buffer_create(device->ws, new_size, 4096, device->ws->cs_domain(device->ws),
- RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC,
- RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &bo);
+ VkResult result = device->ws->buffer_create(
+ device->ws, new_size, 4096, device->ws->cs_domain(device->ws),
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC,
+ RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &bo);
if (result != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, result);
vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY);
return false;
}
- radv_rmv_log_command_buffer_bo_create(device, cmd_buffer->upload.upload_bo, 0,
- cmd_buffer->upload.size, 0);
+ radv_rmv_log_command_buffer_bo_create(device, cmd_buffer->upload.upload_bo, 0, cmd_buffer->upload.size, 0);
return true;
}
bool
-radv_cmd_buffer_upload_alloc_aligned(struct radv_cmd_buffer *cmd_buffer, unsigned size,
- unsigned alignment,
+radv_cmd_buffer_upload_alloc_aligned(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned alignment,
unsigned *out_offset, void **ptr)
{
assert(size % 4 == 0);
}
bool
-radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
- unsigned *out_offset, void **ptr)
+radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr)
{
return radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, size, 0, out_offset, ptr);
}
bool
-radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data,
- unsigned *out_offset)
+radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data, unsigned *out_offset)
{
uint8_t *ptr;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint64_t va;
- if (cmd_buffer->qf != RADV_QUEUE_GENERAL &&
- cmd_buffer->qf != RADV_QUEUE_COMPUTE)
+ if (cmd_buffer->qf != RADV_QUEUE_GENERAL && cmd_buffer->qf != RADV_QUEUE_COMPUTE)
return;
va = radv_buffer_get_va(device->trace_bo);
static void
radv_gang_barrier(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stage_mask,
- VkPipelineStageFlags2 dst_stage_mask)
+ VkPipelineStageFlags2 dst_stage_mask)
{
/* Update flush bits from the main cmdbuf, except the stage flush. */
cmd_buffer->gang.flush_bits |=
cmd_buffer->state.flush_bits & RADV_CMD_FLUSH_ALL_COMPUTE & ~RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
/* Add stage flush only when necessary. */
- if (src_stage_mask &
- (VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_2_TRANSFER_BIT |
- VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT))
+ if (src_stage_mask & (VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_2_TRANSFER_BIT |
+ VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT))
cmd_buffer->gang.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
/* Block task shaders when we have to wait for CP DMA on the GFX cmdbuf. */
if (src_stage_mask &
- (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT |
- VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
- VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT))
+ (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT | VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT |
+ VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT))
dst_stage_mask |= cmd_buffer->state.dma_is_busy ? VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT : 0;
/* Increment the GFX/ACE semaphore when task shaders are blocked. */
- if (dst_stage_mask &
- (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR | VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT |
- VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT))
+ if (dst_stage_mask & (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR | VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT |
+ VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT))
cmd_buffer->gang.sem.leader_value++;
}
const uint32_t flush_bits = cmd_buffer->gang.flush_bits;
enum rgp_flush_bits sqtt_flush_bits = 0;
- si_cs_emit_cache_flush(cmd_buffer->device->ws, ace_cs,
- cmd_buffer->device->physical_device->rad_info.gfx_level, NULL, 0, true,
- flush_bits, &sqtt_flush_bits, 0);
+ si_cs_emit_cache_flush(cmd_buffer->device->ws, ace_cs, cmd_buffer->device->physical_device->rad_info.gfx_level, NULL,
+ 0, true, flush_bits, &sqtt_flush_bits, 0);
cmd_buffer->gang.flush_bits = 0;
}
static bool
radv_gang_leader_sem_dirty(const struct radv_cmd_buffer *cmd_buffer)
{
- return cmd_buffer->gang.sem.leader_value !=
- cmd_buffer->gang.sem.emitted_leader_value;
+ return cmd_buffer->gang.sem.leader_value != cmd_buffer->gang.sem.emitted_leader_value;
}
ALWAYS_INLINE static bool
ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 12);
/* GFX writes a value to the semaphore which ACE can wait for.*/
- si_cs_emit_write_event_eop(
- cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
- radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
- EOP_DATA_SEL_VALUE_32BIT, cmd_buffer->gang.sem.va,
- cmd_buffer->gang.sem.leader_value, cmd_buffer->gfx9_eop_bug_va);
+ si_cs_emit_write_event_eop(cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
+ radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
+ EOP_DATA_SEL_VALUE_32BIT, cmd_buffer->gang.sem.va, cmd_buffer->gang.sem.leader_value,
+ cmd_buffer->gfx9_eop_bug_va);
cmd_buffer->gang.sem.emitted_leader_value = cmd_buffer->gang.sem.leader_value;
radeon_check_space(cmd_buffer->device->ws, ace_cs, 7);
/* ACE waits for the semaphore which GFX wrote. */
- radv_cp_wait_mem(ace_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, cmd_buffer->gang.sem.va,
- cmd_buffer->gang.sem.leader_value, 0xffffffff);
+ radv_cp_wait_mem(ace_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, cmd_buffer->gang.sem.va, cmd_buffer->gang.sem.leader_value,
+ 0xffffffff);
}
static struct radeon_cmdbuf *
{
assert(!cmd_buffer->gang.cs);
struct radv_device *device = cmd_buffer->device;
- struct radeon_cmdbuf *ace_cs = device->ws->cs_create(
- device->ws, AMD_IP_COMPUTE, cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
+ struct radeon_cmdbuf *ace_cs =
+ device->ws->cs_create(device->ws, AMD_IP_COMPUTE, cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
if (!ace_cs)
vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH));
/* Force wait for graphics or compute engines to be idle. */
- si_cs_emit_cache_flush(device->ws, cmd_buffer->cs,
- device->physical_device->rad_info.gfx_level,
+ si_cs_emit_cache_flush(device->ws, cmd_buffer->cs, device->physical_device->rad_info.gfx_level,
&cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va,
radv_cmd_buffer_uses_mec(cmd_buffer), flags, &sqtt_flush_bits,
cmd_buffer->gfx9_eop_bug_va);
if (cmd_buffer->state.graphics_pipeline && (flags & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) &&
radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
/* Force wait for compute engines to be idle on the internal cmdbuf. */
- si_cs_emit_cache_flush(device->ws, cmd_buffer->gang.cs,
- device->physical_device->rad_info.gfx_level, NULL, 0, true,
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH, &sqtt_flush_bits, 0);
+ si_cs_emit_cache_flush(device->ws, cmd_buffer->gang.cs, device->physical_device->rad_info.gfx_level, NULL, 0,
+ true, RADV_CMD_FLAG_CS_PARTIAL_FLUSH, &sqtt_flush_bits, 0);
}
}
radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
struct radv_descriptor_set *set, unsigned idx)
{
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
+ struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point);
descriptors_state->sets[idx] = set;
static void
radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
{
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
+ struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point);
struct radv_device *device = cmd_buffer->device;
uint32_t data[MAX_SETS * 2] = {0};
uint64_t va;
va = radv_buffer_get_va(device->trace_bo) + 40;
- u_foreach_bit(i, descriptors_state->valid)
- {
+ u_foreach_bit (i, descriptors_state->valid) {
struct radv_descriptor_set *set = descriptors_state->sets[i];
data[i * 2] = (uint64_t)(uintptr_t)set;
data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32;
}
static void
-radv_emit_userdata_address(struct radv_device *device, struct radeon_cmdbuf *cs,
- struct radv_shader *shader, uint32_t base_reg, int idx, uint64_t va)
+radv_emit_userdata_address(struct radv_device *device, struct radeon_cmdbuf *cs, struct radv_shader *shader,
+ uint32_t base_reg, int idx, uint64_t va)
{
const struct radv_userdata_info *loc = &shader->info.user_sgprs_locs.shader_data[idx];
}
static void
-radv_emit_descriptor_pointers(struct radv_device *device, struct radeon_cmdbuf *cs,
- struct radv_shader *shader, uint32_t sh_base,
- struct radv_descriptor_state *descriptors_state)
+radv_emit_descriptor_pointers(struct radv_device *device, struct radeon_cmdbuf *cs, struct radv_shader *shader,
+ uint32_t sh_base, struct radv_descriptor_state *descriptors_state)
{
struct radv_userdata_locations *locs = &shader->info.user_sgprs_locs;
unsigned mask = locs->descriptor_sets_enabled;
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
- if (cmd_buffer->state.active_stages & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
- VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
- VK_SHADER_STAGE_GEOMETRY_BIT |
- VK_SHADER_STAGE_MESH_BIT_EXT)) {
+ if (cmd_buffer->state.active_stages &
+ (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
+ VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_MESH_BIT_EXT)) {
/* Ignore dynamic primitive topology for TES/GS/MS stages. */
return cmd_buffer->state.rast_prim;
}
* that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
*/
static void
-radv_convert_user_sample_locs(const struct radv_sample_locations_state *state,
- uint32_t x, uint32_t y, VkOffset2D *sample_locs)
+radv_convert_user_sample_locs(const struct radv_sample_locations_state *state, uint32_t x, uint32_t y,
+ VkOffset2D *sample_locs)
{
uint32_t x_offset = x % state->grid_size.width;
uint32_t y_offset = y % state->grid_size.height;
* locations.
*/
static void
-radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
- uint32_t *sample_locs_pixel)
+radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs, uint32_t *sample_locs_pixel)
{
for (uint32_t i = 0; i < num_samples; i++) {
uint32_t sample_reg_idx = i / 4;
* sample locations.
*/
static uint64_t
-radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer, VkOffset2D *sample_locs,
- uint32_t num_samples)
+radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer, VkOffset2D *sample_locs, uint32_t num_samples)
{
uint32_t *centroid_priorities = alloca(num_samples * sizeof(*centroid_priorities));
uint32_t sample_mask = num_samples - 1;
switch (num_samples) {
case 2:
case 4:
- radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0,
- sample_locs_pixel[0][0]);
- radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0,
- sample_locs_pixel[1][0]);
- radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0,
- sample_locs_pixel[2][0]);
- radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0,
- sample_locs_pixel[3][0]);
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
break;
case 8:
- radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0,
- sample_locs_pixel[0][0]);
- radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0,
- sample_locs_pixel[1][0]);
- radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0,
- sample_locs_pixel[2][0]);
- radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0,
- sample_locs_pixel[3][0]);
- radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1,
- sample_locs_pixel[0][1]);
- radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1,
- sample_locs_pixel[1][1]);
- radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1,
- sample_locs_pixel[2][1]);
- radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1,
- sample_locs_pixel[3][1]);
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
+ radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, sample_locs_pixel[0][1]);
+ radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, sample_locs_pixel[1][1]);
+ radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, sample_locs_pixel[2][1]);
+ radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, sample_locs_pixel[3][1]);
break;
default:
unreachable("invalid number of samples");
}
static void
-radv_emit_inline_push_consts(struct radv_device *device, struct radeon_cmdbuf *cs,
- const struct radv_shader *shader, uint32_t base_reg, int idx,
- uint32_t *values)
+radv_emit_inline_push_consts(struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_shader *shader,
+ uint32_t base_reg, int idx, uint32_t *values)
{
const struct radv_userdata_info *loc = &shader->info.user_sgprs_locs.shader_data[idx];
const unsigned pipe_count = MAX2(rb_count, pdevice->rad_info.num_tcc_blocks);
const unsigned db_tag_part = (db_tag_count * rb_count / pipe_count) * db_tag_size * pipe_count;
- const unsigned color_tag_part =
- (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count;
- const unsigned fmask_tag_part =
- (fmask_tag_count * rb_count / pipe_count) * fmask_tag_size * pipe_count;
+ const unsigned color_tag_part = (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count;
+ const unsigned fmask_tag_part = (fmask_tag_count * rb_count / pipe_count) * fmask_tag_size * pipe_count;
const unsigned total_samples = radv_get_rasterization_samples(cmd_buffer);
const unsigned samples_log = util_logbase2_ceil(total_samples);
if (fmask_bytes_per_pixel) {
const unsigned fmask_pixel_count_log = util_logbase2(fmask_tag_part / fmask_bytes_per_pixel);
- const VkExtent2D fmask_extent =
- (VkExtent2D){.width = 1ull << ((fmask_pixel_count_log + 1) / 2),
- .height = 1ull << (color_pixel_count_log / 2)};
+ const VkExtent2D fmask_extent = (VkExtent2D){.width = 1ull << ((fmask_pixel_count_log + 1) / 2),
+ .height = 1ull << (color_pixel_count_log / 2)};
if (fmask_extent.width * fmask_extent.height < extent.width * extent.height)
extent = fmask_extent;
const unsigned db_pixel_count_log = util_logbase2(db_tag_part / db_bytes_per_pixel);
- const VkExtent2D db_extent = (VkExtent2D){.width = 1ull << ((db_pixel_count_log + 1) / 2),
- .height = 1ull << (color_pixel_count_log / 2)};
+ const VkExtent2D db_extent =
+ (VkExtent2D){.width = 1ull << ((db_pixel_count_log + 1) / 2), .height = 1ull << (color_pixel_count_log / 2)};
if (db_extent.width * db_extent.height < extent.width * extent.height)
extent = db_extent;
VkExtent2D extent = {512, 512};
- unsigned log_num_rb_per_se =
- util_logbase2_ceil(pdevice->rad_info.max_render_backends / pdevice->rad_info.max_se);
+ unsigned log_num_rb_per_se = util_logbase2_ceil(pdevice->rad_info.max_render_backends / pdevice->rad_info.max_se);
unsigned log_num_se = util_logbase2_ceil(pdevice->rad_info.max_se);
unsigned total_samples = radv_get_rasterization_samples(cmd_buffer);
min_bytes_per_pixel = bytes;
}
- pa_sc_binner_cntl_0 =
- S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_NEW_SC) | S_028C44_BIN_SIZE_X(0) |
- S_028C44_BIN_SIZE_Y(0) | S_028C44_BIN_SIZE_X_EXTEND(2) | /* 128 */
- S_028C44_BIN_SIZE_Y_EXTEND(min_bytes_per_pixel <= 4 ? 2 : 1) | /* 128 or 64 */
- S_028C44_DISABLE_START_OF_PRIM(1) |
- S_028C44_FLUSH_ON_BINNING_TRANSITION(1);
+ pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_NEW_SC) | S_028C44_BIN_SIZE_X(0) |
+ S_028C44_BIN_SIZE_Y(0) | S_028C44_BIN_SIZE_X_EXTEND(2) | /* 128 */
+ S_028C44_BIN_SIZE_Y_EXTEND(min_bytes_per_pixel <= 4 ? 2 : 1) | /* 128 or 64 */
+ S_028C44_DISABLE_START_OF_PRIM(1) | S_028C44_FLUSH_ON_BINNING_TRANSITION(1);
} else {
pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
S_028C44_DISABLE_START_OF_PRIM(1) |
if (device->pbb_allowed && bin_size.width && bin_size.height) {
struct radv_binning_settings *settings = &device->physical_device->binning_settings;
- pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
- S_028C44_BIN_SIZE_X(bin_size.width == 16) |
- S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
- S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) |
- S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
- S_028C44_CONTEXT_STATES_PER_BIN(settings->context_states_per_bin - 1) |
- S_028C44_PERSISTENT_STATES_PER_BIN(settings->persistent_states_per_bin - 1) |
- S_028C44_DISABLE_START_OF_PRIM(1) |
- S_028C44_FPOVS_PER_BATCH(settings->fpovs_per_batch) |
- S_028C44_OPTIMAL_BIN_SELECTION(1) |
- S_028C44_FLUSH_ON_BINNING_TRANSITION(device->physical_device->rad_info.family == CHIP_VEGA12 ||
- device->physical_device->rad_info.family == CHIP_VEGA20 ||
- device->physical_device->rad_info.family >= CHIP_RAVEN2);
+ pa_sc_binner_cntl_0 =
+ S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | S_028C44_BIN_SIZE_X(bin_size.width == 16) |
+ S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
+ S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) |
+ S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
+ S_028C44_CONTEXT_STATES_PER_BIN(settings->context_states_per_bin - 1) |
+ S_028C44_PERSISTENT_STATES_PER_BIN(settings->persistent_states_per_bin - 1) |
+ S_028C44_DISABLE_START_OF_PRIM(1) | S_028C44_FPOVS_PER_BATCH(settings->fpovs_per_batch) |
+ S_028C44_OPTIMAL_BIN_SELECTION(1) |
+ S_028C44_FLUSH_ON_BINNING_TRANSITION(device->physical_device->rad_info.family == CHIP_VEGA12 ||
+ device->physical_device->rad_info.family == CHIP_VEGA20 ||
+ device->physical_device->rad_info.family >= CHIP_RAVEN2);
} else {
pa_sc_binner_cntl_0 = radv_get_disabled_binning_state(cmd_buffer);
}
case V_028C70_COLOR_8_8:
case V_028C70_COLOR_8_8_8_8:
/* For 1 and 2-channel formats, use the superset thereof. */
- if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
- spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
case V_028C70_COLOR_16:
case V_028C70_COLOR_16_16:
/* For 1-channel formats, use the superset thereof. */
- if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
- spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
- spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
- spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+ if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_UINT16_ABGR || spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV)
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
else
assert((ps_epilog->va >> 32) == cmd_buffer->device->physical_device->rad_info.address32_hi);
- struct radv_userdata_info *loc =
- &ps_shader->info.user_sgprs_locs.shader_data[AC_UD_PS_EPILOG_PC];
+ struct radv_userdata_info *loc = &ps_shader->info.user_sgprs_locs.shader_data[AC_UD_PS_EPILOG_PC];
uint32_t base_reg = ps_shader->info.user_data_0;
assert(loc->sgpr_idx != -1);
assert(loc->num_sgprs == 1);
- radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
- ps_epilog->va, false);
+ radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, ps_epilog->va, false);
cmd_buffer->shader_upload_seq = MAX2(cmd_buffer->shader_upload_seq, ps_epilog->upload_seq);
return;
if (cmd_buffer->state.emitted_graphics_pipeline) {
- if (radv_rast_prim_is_points_or_lines(cmd_buffer->state.emitted_graphics_pipeline->rast_prim) != radv_rast_prim_is_points_or_lines(pipeline->rast_prim))
+ if (radv_rast_prim_is_points_or_lines(cmd_buffer->state.emitted_graphics_pipeline->rast_prim) !=
+ radv_rast_prim_is_points_or_lines(pipeline->rast_prim))
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_GUARDBAND;
if (cmd_buffer->state.emitted_graphics_pipeline->custom_blend_mode != pipeline->custom_blend_mode)
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP |
- RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP_ENABLE;
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP | RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP_ENABLE;
if (cmd_buffer->state.emitted_graphics_pipeline->ms.min_sample_shading != pipeline->ms.min_sample_shading ||
cmd_buffer->state.emitted_graphics_pipeline->uses_out_of_order_rast != pipeline->uses_out_of_order_rast ||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE;
}
- if (cmd_buffer->state.emitted_graphics_pipeline->db_shader_control !=
- pipeline->db_shader_control)
+ if (cmd_buffer->state.emitted_graphics_pipeline->db_shader_control != pipeline->db_shader_control)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE;
if (cmd_buffer->state.emitted_graphics_pipeline->db_render_control != pipeline->db_render_control)
if ((!cmd_buffer->state.emitted_graphics_pipeline ||
cmd_buffer->state.emitted_graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT] !=
- cmd_buffer->state.graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT]) &&
+ cmd_buffer->state.graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT]) &&
(settings->context_states_per_bin > 1 || settings->persistent_states_per_bin > 1)) {
/* Break the batch on PS changes. */
radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
}
if (cmd_buffer->state.gs_copy_shader) {
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
- cmd_buffer->state.gs_copy_shader->bo);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->state.gs_copy_shader->bo);
}
if (unlikely(cmd_buffer->device->trace_bo))
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
return d->vk.rs.depth_clip_enable == VK_MESA_DEPTH_CLIP_ENABLE_TRUE ||
- (d->vk.rs.depth_clip_enable == VK_MESA_DEPTH_CLIP_ENABLE_NOT_CLAMP &&
- !d->vk.rs.depth_clamp_enable);
+ (d->vk.rs.depth_clip_enable == VK_MESA_DEPTH_CLIP_ENABLE_NOT_CLAMP && !d->vk.rs.depth_clamp_enable);
}
static enum radv_depth_clamp_mode
/* For optimal performance, depth clamping should always be enabled except if the application
* disables clamping explicitly or uses depth values outside of the [0.0, 1.0] range.
*/
- if (!depth_clip_enable ||
- device->vk.enabled_extensions.EXT_depth_range_unrestricted) {
+ if (!depth_clip_enable || device->vk.enabled_extensions.EXT_depth_range_unrestricted) {
mode = RADV_DEPTH_CLAMP_MODE_DISABLED;
} else {
mode = RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE;
enum radv_depth_clamp_mode depth_clamp_mode = radv_get_depth_clamp_mode(cmd_buffer);
assert(d->vk.vp.viewport_count);
- radeon_set_context_reg_seq(cmd_buffer->cs, R_02843C_PA_CL_VPORT_XSCALE,
- d->vk.vp.viewport_count * 6);
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_02843C_PA_CL_VPORT_XSCALE, d->vk.vp.viewport_count * 6);
for (unsigned i = 0; i < d->vk.vp.viewport_count; i++) {
radeon_emit(cmd_buffer->cs, fui(d->hw_vp.xform[i].scale[0]));
radeon_emit(cmd_buffer->cs, fui(translate_z));
}
- radeon_set_context_reg_seq(cmd_buffer->cs, R_0282D0_PA_SC_VPORT_ZMIN_0,
- d->vk.vp.viewport_count * 2);
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_0282D0_PA_SC_VPORT_ZMIN_0, d->vk.vp.viewport_count * 2);
for (unsigned i = 0; i < d->vk.vp.viewport_count; i++) {
float zmin, zmax;
cliprect_rule |= 1u << i;
}
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL,
- d->vk.dr.rectangle_count * 2);
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL, d->vk.dr.rectangle_count * 2);
for (unsigned i = 0; i < d->vk.dr.rectangle_count; ++i) {
VkRect2D rect = d->vk.dr.rectangles[i];
radeon_emit(cmd_buffer->cs, S_028210_TL_X(rect.offset.x) | S_028210_TL_Y(rect.offset.y));
unsigned slope = fui(d->vk.rs.depth_bias.slope * 16.0f);
radeon_set_context_reg_seq(cmd_buffer->cs, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5);
- radeon_emit(cmd_buffer->cs, fui(d->vk.rs.depth_bias.clamp)); /* CLAMP */
- radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */
+ radeon_emit(cmd_buffer->cs, fui(d->vk.rs.depth_bias.clamp)); /* CLAMP */
+ radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */
radeon_emit(cmd_buffer->cs, fui(d->vk.rs.depth_bias.constant)); /* FRONT OFFSET */
- radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */
+ radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */
radeon_emit(cmd_buffer->cs, fui(d->vk.rs.depth_bias.constant)); /* BACK OFFSET */
}
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
unsigned pa_su_sc_mode_cntl;
- pa_su_sc_mode_cntl = S_028814_CULL_FRONT(!!(d->vk.rs.cull_mode & VK_CULL_MODE_FRONT_BIT)) |
- S_028814_CULL_BACK(!!(d->vk.rs.cull_mode & VK_CULL_MODE_BACK_BIT)) |
- S_028814_FACE(d->vk.rs.front_face) |
- S_028814_POLY_OFFSET_FRONT_ENABLE(d->vk.rs.depth_bias.enable) |
- S_028814_POLY_OFFSET_BACK_ENABLE(d->vk.rs.depth_bias.enable) |
- S_028814_POLY_OFFSET_PARA_ENABLE(d->vk.rs.depth_bias.enable) |
- S_028814_POLY_MODE(d->vk.rs.polygon_mode != V_028814_X_DRAW_TRIANGLES) |
- S_028814_POLYMODE_FRONT_PTYPE(d->vk.rs.polygon_mode) |
- S_028814_POLYMODE_BACK_PTYPE(d->vk.rs.polygon_mode) |
- S_028814_PROVOKING_VTX_LAST(d->vk.rs.provoking_vertex ==
- VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT);
+ pa_su_sc_mode_cntl =
+ S_028814_CULL_FRONT(!!(d->vk.rs.cull_mode & VK_CULL_MODE_FRONT_BIT)) |
+ S_028814_CULL_BACK(!!(d->vk.rs.cull_mode & VK_CULL_MODE_BACK_BIT)) | S_028814_FACE(d->vk.rs.front_face) |
+ S_028814_POLY_OFFSET_FRONT_ENABLE(d->vk.rs.depth_bias.enable) |
+ S_028814_POLY_OFFSET_BACK_ENABLE(d->vk.rs.depth_bias.enable) |
+ S_028814_POLY_OFFSET_PARA_ENABLE(d->vk.rs.depth_bias.enable) |
+ S_028814_POLY_MODE(d->vk.rs.polygon_mode != V_028814_X_DRAW_TRIANGLES) |
+ S_028814_POLYMODE_FRONT_PTYPE(d->vk.rs.polygon_mode) | S_028814_POLYMODE_BACK_PTYPE(d->vk.rs.polygon_mode) |
+ S_028814_PROVOKING_VTX_LAST(d->vk.rs.provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT);
if (gfx_level >= GFX10) {
/* Ensure that SC processes the primitive group in the same order as PA produced them. Needed
* when either POLY_MODE or PERPENDICULAR_ENDCAP_ENA is set.
*/
- pa_su_sc_mode_cntl |= S_028814_KEEP_TOGETHER_ENABLE(
- d->vk.rs.polygon_mode != V_028814_X_DRAW_TRIANGLES ||
- d->vk.rs.line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT);
+ pa_su_sc_mode_cntl |=
+ S_028814_KEEP_TOGETHER_ENABLE(d->vk.rs.polygon_mode != V_028814_X_DRAW_TRIANGLES ||
+ d->vk.rs.line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT);
}
return pa_su_sc_mode_cntl;
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
const unsigned stage = last_vgt_shader->info.stage;
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
- const struct radv_userdata_info *loc =
- radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_PROVOKING_VTX);
+ const struct radv_userdata_info *loc = radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_PROVOKING_VTX);
unsigned provoking_vtx = 0;
uint32_t base_reg;
if (d->vk.rs.provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT) {
if (stage == MESA_SHADER_VERTEX) {
- provoking_vtx = si_conv_prim_to_gs_out(d->vk.ia.primitive_topology,
- last_vgt_shader->info.is_ngg);
+ provoking_vtx = si_conv_prim_to_gs_out(d->vk.ia.primitive_topology, last_vgt_shader->info.is_ngg);
} else {
assert(stage == MESA_SHADER_GEOMETRY);
provoking_vtx = last_vgt_shader->info.gs.vertices_in - 1;
radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
- const struct radv_userdata_info *loc =
- radv_get_user_sgpr(last_vgt_shader, AC_UD_NUM_VERTS_PER_PRIM);
+ const struct radv_userdata_info *loc = radv_get_user_sgpr(last_vgt_shader, AC_UD_NUM_VERTS_PER_PRIM);
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
uint32_t base_reg;
assert(!cmd_buffer->state.mesh_shading);
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
- radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cmd_buffer->cs,
- R_030908_VGT_PRIMITIVE_TYPE, 1, d->vk.ia.primitive_topology);
+ radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cmd_buffer->cs, R_030908_VGT_PRIMITIVE_TYPE, 1,
+ d->vk.ia.primitive_topology);
} else {
- radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE,
- d->vk.ia.primitive_topology);
+ radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, d->vk.ia.primitive_topology);
}
if (loc->sgpr_idx == -1)
base_reg = last_vgt_shader->info.user_data_0;
radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
- si_conv_prim_to_gs_out(d->vk.ia.primitive_topology,
- last_vgt_shader->info.is_ngg) + 1);
+ si_conv_prim_to_gs_out(d->vk.ia.primitive_topology, last_vgt_shader->info.is_ngg) + 1);
}
static void
{
struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
- radeon_set_context_reg(
- cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL,
- S_028800_Z_ENABLE(d->vk.ds.depth.test_enable ? 1 : 0) |
- S_028800_Z_WRITE_ENABLE(d->vk.ds.depth.write_enable ? 1 : 0) |
- S_028800_ZFUNC(d->vk.ds.depth.compare_op) |
- S_028800_DEPTH_BOUNDS_ENABLE(d->vk.ds.depth.bounds_test.enable ? 1 : 0) |
- S_028800_STENCIL_ENABLE(d->vk.ds.stencil.test_enable ? 1 : 0) |
- S_028800_BACKFACE_ENABLE(d->vk.ds.stencil.test_enable ? 1 : 0) |
- S_028800_STENCILFUNC(d->vk.ds.stencil.front.op.compare) |
- S_028800_STENCILFUNC_BF(d->vk.ds.stencil.back.op.compare));
+ radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL,
+ S_028800_Z_ENABLE(d->vk.ds.depth.test_enable ? 1 : 0) |
+ S_028800_Z_WRITE_ENABLE(d->vk.ds.depth.write_enable ? 1 : 0) |
+ S_028800_ZFUNC(d->vk.ds.depth.compare_op) |
+ S_028800_DEPTH_BOUNDS_ENABLE(d->vk.ds.depth.bounds_test.enable ? 1 : 0) |
+ S_028800_STENCIL_ENABLE(d->vk.ds.stencil.test_enable ? 1 : 0) |
+ S_028800_BACKFACE_ENABLE(d->vk.ds.stencil.test_enable ? 1 : 0) |
+ S_028800_STENCILFUNC(d->vk.ds.stencil.front.op.compare) |
+ S_028800_STENCILFUNC_BF(d->vk.ds.stencil.back.op.compare));
}
static void
{
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
- radeon_set_context_reg(
- cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL,
- S_02842C_STENCILFAIL(si_translate_stencil_op(d->vk.ds.stencil.front.op.fail)) |
- S_02842C_STENCILZPASS(si_translate_stencil_op(d->vk.ds.stencil.front.op.pass)) |
- S_02842C_STENCILZFAIL(si_translate_stencil_op(d->vk.ds.stencil.front.op.depth_fail)) |
- S_02842C_STENCILFAIL_BF(si_translate_stencil_op(d->vk.ds.stencil.back.op.fail)) |
- S_02842C_STENCILZPASS_BF(si_translate_stencil_op(d->vk.ds.stencil.back.op.pass)) |
- S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(d->vk.ds.stencil.back.op.depth_fail)));
+ radeon_set_context_reg(cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL,
+ S_02842C_STENCILFAIL(si_translate_stencil_op(d->vk.ds.stencil.front.op.fail)) |
+ S_02842C_STENCILZPASS(si_translate_stencil_op(d->vk.ds.stencil.front.op.pass)) |
+ S_02842C_STENCILZFAIL(si_translate_stencil_op(d->vk.ds.stencil.front.op.depth_fail)) |
+ S_02842C_STENCILFAIL_BF(si_translate_stencil_op(d->vk.ds.stencil.back.op.fail)) |
+ S_02842C_STENCILZPASS_BF(si_translate_stencil_op(d->vk.ds.stencil.back.op.pass)) |
+ S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(d->vk.ds.stencil.back.op.depth_fail)));
}
static bool
const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT];
return pdevice->rad_info.gfx_level >= GFX10_3 &&
- (cmd_buffer->state.ms.sample_shading_enable || (ps && ps->info.ps.reads_sample_mask_in &&
- !ps->info.ps.needs_poly_line_smooth));
+ (cmd_buffer->state.ms.sample_shading_enable ||
+ (ps && ps->info.ps.reads_sample_mask_in && !ps->info.ps.needs_poly_line_smooth));
}
static void
/* When per-vertex VRS is forced and the dynamic fragment shading rate is a no-op, ignore
* it. This is needed for vkd3d-proton because it always declares per-draw VRS as dynamic.
*/
- if (cmd_buffer->device->force_vrs != RADV_FORCE_VRS_1x1 &&
- d->vk.fsr.fragment_size.width == 1 && d->vk.fsr.fragment_size.height == 1 &&
+ if (cmd_buffer->device->force_vrs != RADV_FORCE_VRS_1x1 && d->vk.fsr.fragment_size.width == 1 &&
+ d->vk.fsr.fragment_size.height == 1 &&
d->vk.fsr.combiner_ops[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
d->vk.fsr.combiner_ops[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)
return;
}
/* Emit per-draw VRS rate which is the first combiner. */
- radeon_set_uconfig_reg(cmd_buffer->cs, R_03098C_GE_VRS_RATE,
- S_03098C_RATE_X(rate_x) | S_03098C_RATE_Y(rate_y));
+ radeon_set_uconfig_reg(cmd_buffer->cs, R_03098C_GE_VRS_RATE, S_03098C_RATE_X(rate_x) | S_03098C_RATE_Y(rate_y));
/* Disable VRS and use the rates from PS_ITER_SAMPLES if:
*
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
bool depth_clip_enable = radv_get_depth_clip_enable(cmd_buffer);
- radeon_set_context_reg(cmd_buffer->cs, R_028810_PA_CL_CLIP_CNTL,
- S_028810_DX_RASTERIZATION_KILL(d->vk.rs.rasterizer_discard_enable) |
- S_028810_ZCLIP_NEAR_DISABLE(!depth_clip_enable) |
- S_028810_ZCLIP_FAR_DISABLE(!depth_clip_enable) |
- S_028810_DX_CLIP_SPACE_DEF(!d->vk.vp.depth_clip_negative_one_to_one) |
- S_028810_DX_LINEAR_ATTR_CLIP_ENA(1));
+ radeon_set_context_reg(
+ cmd_buffer->cs, R_028810_PA_CL_CLIP_CNTL,
+ S_028810_DX_RASTERIZATION_KILL(d->vk.rs.rasterizer_discard_enable) |
+ S_028810_ZCLIP_NEAR_DISABLE(!depth_clip_enable) | S_028810_ZCLIP_FAR_DISABLE(!depth_clip_enable) |
+ S_028810_DX_CLIP_SPACE_DEF(!d->vk.vp.depth_clip_negative_one_to_one) | S_028810_DX_LINEAR_ATTR_CLIP_ENA(1));
}
static bool
/* RB+ doesn't work with dual source blending, logic op and CB_RESOLVE. */
bool mrt0_is_dual_src = radv_is_mrt0_dual_src(cmd_buffer);
- cb_color_control |=
- S_028808_DISABLE_DUAL_QUAD(mrt0_is_dual_src || d->vk.cb.logic_op_enable ||
- cmd_buffer->state.custom_blend_mode == V_028808_CB_RESOLVE);
+ cb_color_control |= S_028808_DISABLE_DUAL_QUAD(mrt0_is_dual_src || d->vk.cb.logic_op_enable ||
+ cmd_buffer->state.custom_blend_mode == V_028808_CB_RESOLVE);
}
if (cmd_buffer->state.custom_blend_mode) {
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
uint32_t color_write_enable = 0, color_write_mask = 0;
- u_foreach_bit(i, d->vk.cb.color_write_enables) {
+ u_foreach_bit (i, d->vk.cb.color_write_enables) {
color_write_enable |= 0xfu << (i * 4);
}
radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
}
- radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK,
- color_write_mask & color_write_enable);
+ radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, color_write_mask & color_write_enable);
}
static void
if (cmd_buffer->state.graphics_pipeline->dynamic_states & RADV_DYNAMIC_PATCH_CONTROL_POINTS) {
/* Compute the number of patches. */
cmd_buffer->state.tess_num_patches = get_tcs_num_patches(
- d->vk.ts.patch_control_points, tcs->info.tcs.tcs_vertices_out,
- tcs->info.tcs.num_linked_inputs, tcs->info.tcs.num_linked_outputs,
- tcs->info.tcs.num_linked_patch_outputs, pdevice->hs.tess_offchip_block_dw_size,
- pdevice->rad_info.gfx_level, pdevice->rad_info.family);
+ d->vk.ts.patch_control_points, tcs->info.tcs.tcs_vertices_out, tcs->info.tcs.num_linked_inputs,
+ tcs->info.tcs.num_linked_outputs, tcs->info.tcs.num_linked_patch_outputs,
+ pdevice->hs.tess_offchip_block_dw_size, pdevice->rad_info.gfx_level, pdevice->rad_info.family);
/* Compute the LDS size. */
cmd_buffer->state.tess_lds_size = calculate_tess_lds_size(
pdevice->rad_info.gfx_level, d->vk.ts.patch_control_points, tcs->info.tcs.tcs_vertices_out,
- tcs->info.tcs.num_linked_inputs, cmd_buffer->state.tess_num_patches,
- tcs->info.tcs.num_linked_outputs, tcs->info.tcs.num_linked_patch_outputs);
+ tcs->info.tcs.num_linked_inputs, cmd_buffer->state.tess_num_patches, tcs->info.tcs.num_linked_outputs,
+ tcs->info.tcs.num_linked_patch_outputs);
}
ls_hs_config = S_028B58_NUM_PATCHES(cmd_buffer->state.tess_num_patches) |
base_reg = cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL]->info.user_data_0;
radeon_set_sh_reg(cmd_buffer->cs, base_reg + offchip->sgpr_idx * 4, tcs_offchip_layout);
- const struct radv_userdata_info *num_patches = radv_get_user_sgpr(
- radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_TESS_EVAL), AC_UD_TES_NUM_PATCHES);
+ const struct radv_userdata_info *num_patches =
+ radv_get_user_sgpr(radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_TESS_EVAL), AC_UD_TES_NUM_PATCHES);
assert(num_patches->sgpr_idx != -1 && num_patches->num_sgprs == 1);
const struct radv_shader *tes = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_TESS_EVAL);
base_reg = tes->info.user_data_0;
- radeon_set_sh_reg(cmd_buffer->cs, base_reg + num_patches->sgpr_idx * 4,
- cmd_buffer->state.tess_num_patches);
+ radeon_set_sh_reg(cmd_buffer->cs, base_reg + num_patches->sgpr_idx * 4, cmd_buffer->state.tess_num_patches);
}
static void
const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT];
const bool uses_inner_coverage = ps && ps->info.ps.reads_fully_covered;
- pa_sc_conservative_rast = S_028C4C_PREZ_AA_MASK_ENABLE(1) | S_028C4C_POSTZ_AA_MASK_ENABLE(1) |
- S_028C4C_CENTROID_SAMPLE_OVERRIDE(1);
+ pa_sc_conservative_rast =
+ S_028C4C_PREZ_AA_MASK_ENABLE(1) | S_028C4C_POSTZ_AA_MASK_ENABLE(1) | S_028C4C_CENTROID_SAMPLE_OVERRIDE(1);
/* Inner coverage requires underestimate conservative rasterization. */
if (d->vk.rs.conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT &&
!uses_inner_coverage) {
- pa_sc_conservative_rast |= S_028C4C_OVER_RAST_ENABLE(1) |
- S_028C4C_UNDER_RAST_SAMPLE_SELECT(1) |
+ pa_sc_conservative_rast |= S_028C4C_OVER_RAST_ENABLE(1) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(1) |
S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(1);
} else {
- pa_sc_conservative_rast |=
- S_028C4C_OVER_RAST_SAMPLE_SELECT(1) | S_028C4C_UNDER_RAST_ENABLE(1);
+ pa_sc_conservative_rast |= S_028C4C_OVER_RAST_SAMPLE_SELECT(1) | S_028C4C_UNDER_RAST_ENABLE(1);
}
} else {
pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1);
}
- radeon_set_context_reg(cmd_buffer->cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
- pa_sc_conservative_rast);
+ radeon_set_context_reg(cmd_buffer->cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, pa_sc_conservative_rast);
}
}
radeon_set_context_reg(cmd_buffer->cs, R_02800C_DB_RENDER_OVERRIDE,
S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
- S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) |
- S_02800C_DISABLE_VIEWPORT_CLAMP(mode == RADV_DEPTH_CLAMP_MODE_DISABLED));
+ S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) |
+ S_02800C_DISABLE_VIEWPORT_CLAMP(mode == RADV_DEPTH_CLAMP_MODE_DISABLED));
}
static void
/* This should only be set when VRS surfaces aren't enabled on GFX11, otherwise the GPU might
* hang.
*/
- S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(pdevice->rad_info.gfx_level < GFX11 ||
- !cmd_buffer->state.uses_vrs_attachment);
+ S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(pdevice->rad_info.gfx_level < GFX11 || !cmd_buffer->state.uses_vrs_attachment);
if (!d->sample_location.count)
radv_emit_default_sample_locations(cmd_buffer->cs, rasterization_samples);
}
static void
-radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index,
- struct radv_color_buffer_info *cb, struct radv_image_view *iview,
- VkImageLayout layout)
+radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, struct radv_color_buffer_info *cb,
+ struct radv_image_view *iview, VkImageLayout layout)
{
bool is_vi = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX8;
uint32_t cb_fdcc_control = cb->cb_dcc_control;
uint32_t cb_color_info = cb->cb_color_info;
struct radv_image *image = iview->image;
- if (!radv_layout_dcc_compressed(
- cmd_buffer->device, image, iview->vk.base_mip_level, layout,
- radv_image_queue_family_mask(image, cmd_buffer->qf,
- cmd_buffer->qf))) {
+ if (!radv_layout_dcc_compressed(cmd_buffer->device, image, iview->vk.base_mip_level, layout,
+ radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf))) {
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
cb_fdcc_control &= C_028C78_FDCC_ENABLE;
} else {
}
}
- const enum radv_fmask_compression fmask_comp =
- radv_layout_fmask_compression(cmd_buffer->device, image, layout,
- radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf));
+ const enum radv_fmask_compression fmask_comp = radv_layout_fmask_compression(
+ cmd_buffer->device, image, layout, radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf));
if (fmask_comp == RADV_FMASK_COMPRESSION_NONE) {
cb_color_info &= C_028C70_COMPRESSION;
}
- if (radv_image_is_tc_compat_cmask(image) && (radv_is_fmask_decompress_pipeline(cmd_buffer) ||
- radv_is_dcc_decompress_pipeline(cmd_buffer))) {
+ if (radv_image_is_tc_compat_cmask(image) &&
+ (radv_is_fmask_decompress_pipeline(cmd_buffer) || radv_is_dcc_decompress_pipeline(cmd_buffer))) {
/* If this bit is set, the FMASK decompression operation
* doesn't occur (DCC_COMPRESS also implies FMASK_DECOMPRESS).
*/
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
radeon_set_context_reg_seq(cmd_buffer->cs, R_028C6C_CB_COLOR0_VIEW + index * 0x3c, 4);
- radeon_emit(cmd_buffer->cs, cb->cb_color_view); /* CB_COLOR0_VIEW */
- radeon_emit(cmd_buffer->cs, cb->cb_color_info); /* CB_COLOR0_INFO */
- radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); /* CB_COLOR0_ATTRIB */
- radeon_emit(cmd_buffer->cs, cb_fdcc_control); /* CB_COLOR0_FDCC_CONTROL */
+ radeon_emit(cmd_buffer->cs, cb->cb_color_view); /* CB_COLOR0_VIEW */
+ radeon_emit(cmd_buffer->cs, cb->cb_color_info); /* CB_COLOR0_INFO */
+ radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); /* CB_COLOR0_ATTRIB */
+ radeon_emit(cmd_buffer->cs, cb_fdcc_control); /* CB_COLOR0_FDCC_CONTROL */
radeon_set_context_reg(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, cb->cb_color_base);
radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4, cb->cb_color_base >> 32);
radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base);
- radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4,
- cb->cb_color_base >> 32);
- radeon_set_context_reg(cmd_buffer->cs, R_028E60_CB_COLOR0_CMASK_BASE_EXT + index * 4,
- cb->cb_color_cmask >> 32);
- radeon_set_context_reg(cmd_buffer->cs, R_028E80_CB_COLOR0_FMASK_BASE_EXT + index * 4,
- cb->cb_color_fmask >> 32);
- radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4,
- cb->cb_dcc_base >> 32);
- radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4,
- cb->cb_color_attrib2);
- radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4,
- cb->cb_color_attrib3);
+ radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4, cb->cb_color_base >> 32);
+ radeon_set_context_reg(cmd_buffer->cs, R_028E60_CB_COLOR0_CMASK_BASE_EXT + index * 4, cb->cb_color_cmask >> 32);
+ radeon_set_context_reg(cmd_buffer->cs, R_028E80_CB_COLOR0_FMASK_BASE_EXT + index * 4, cb->cb_color_fmask >> 32);
+ radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4, cb->cb_dcc_base >> 32);
+ radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4, cb->cb_color_attrib2);
+ radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4, cb->cb_color_attrib3);
} else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
radeon_emit(cmd_buffer->cs, cb->cb_color_base);
radeon_emit(cmd_buffer->cs, cb->cb_dcc_base);
radeon_emit(cmd_buffer->cs, S_028C98_BASE_256B(cb->cb_dcc_base >> 32));
- radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4,
- cb->cb_mrt_epitch);
+ radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4, cb->cb_mrt_epitch);
} else {
radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
radeon_emit(cmd_buffer->cs, cb->cb_color_base);
radeon_emit(cmd_buffer->cs, cb->cb_color_fmask_slice);
if (is_vi) { /* DCC BASE */
- radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c,
- cb->cb_dcc_base);
+ radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base);
}
}
- if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11
- ? G_028C78_FDCC_ENABLE(cb_fdcc_control)
- : G_028C70_DCC_ENABLE(cb_color_info)) {
+ if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 ? G_028C78_FDCC_ENABLE(cb_fdcc_control)
+ : G_028C70_DCC_ENABLE(cb_color_info)) {
/* Drawing with DCC enabled also compresses colorbuffers. */
VkImageSubresourceRange range = {
.aspectMask = iview->vk.aspects,
static void
radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds,
- const struct radv_image_view *iview, VkImageLayout layout,
- bool requires_cond_exec)
+ const struct radv_image_view *iview, VkImageLayout layout, bool requires_cond_exec)
{
const struct radv_image *image = iview->image;
uint32_t db_z_info = ds->db_z_info;
uint32_t db_z_info_reg;
- if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug ||
- !radv_image_is_tc_compat_htile(image))
+ if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug || !radv_image_is_tc_compat_htile(image))
return;
- if (!radv_layout_is_htile_compressed(
- cmd_buffer->device, image, layout,
- radv_image_queue_family_mask(image, cmd_buffer->qf,
- cmd_buffer->qf))) {
+ if (!radv_layout_is_htile_compressed(cmd_buffer->device, image, layout,
+ radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf))) {
db_z_info &= C_028040_TILE_SURFACE_ENABLE;
}
}
static void
-radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds,
- struct radv_image_view *iview, VkImageLayout layout)
+radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, struct radv_image_view *iview,
+ VkImageLayout layout)
{
const struct radv_image *image = iview->image;
uint32_t db_z_info = ds->db_z_info;
uint32_t db_htile_surface = ds->db_htile_surface;
uint32_t db_render_control = ds->db_render_control | cmd_buffer->state.db_render_control;
- if (!radv_layout_is_htile_compressed(
- cmd_buffer->device, image, layout,
- radv_image_queue_family_mask(image, cmd_buffer->qf,
- cmd_buffer->qf))) {
+ if (!radv_layout_is_htile_compressed(cmd_buffer->device, image, layout,
+ radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf))) {
db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(1) | S_028000_STENCIL_COMPRESS_DISABLE(1);
}
- if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX10_3 &&
- !cmd_buffer->state.render.vrs_att.iview) {
+ if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX10_3 && !cmd_buffer->state.render.vrs_att.iview) {
db_htile_surface &= C_028ABC_VRS_HTILE_ENCODING;
}
radeon_emit(cmd_buffer->cs, ds->db_depth_size);
radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 10);
- radeon_emit(cmd_buffer->cs, db_z_info); /* DB_Z_INFO */
- radeon_emit(cmd_buffer->cs, db_stencil_info); /* DB_STENCIL_INFO */
- radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* DB_Z_READ_BASE */
- radeon_emit(cmd_buffer->cs,
- S_028044_BASE_HI(ds->db_z_read_base >> 32)); /* DB_Z_READ_BASE_HI */
- radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* DB_STENCIL_READ_BASE */
- radeon_emit(cmd_buffer->cs,
- S_02804C_BASE_HI(ds->db_stencil_read_base >> 32)); /* DB_STENCIL_READ_BASE_HI */
- radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* DB_Z_WRITE_BASE */
- radeon_emit(cmd_buffer->cs,
- S_028054_BASE_HI(ds->db_z_write_base >> 32)); /* DB_Z_WRITE_BASE_HI */
- radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* DB_STENCIL_WRITE_BASE */
- radeon_emit(cmd_buffer->cs,
- S_02805C_BASE_HI(ds->db_stencil_write_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */
+ radeon_emit(cmd_buffer->cs, db_z_info); /* DB_Z_INFO */
+ radeon_emit(cmd_buffer->cs, db_stencil_info); /* DB_STENCIL_INFO */
+ radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* DB_Z_READ_BASE */
+ radeon_emit(cmd_buffer->cs, S_028044_BASE_HI(ds->db_z_read_base >> 32)); /* DB_Z_READ_BASE_HI */
+ radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* DB_STENCIL_READ_BASE */
+ radeon_emit(cmd_buffer->cs, S_02804C_BASE_HI(ds->db_stencil_read_base >> 32)); /* DB_STENCIL_READ_BASE_HI */
+ radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* DB_Z_WRITE_BASE */
+ radeon_emit(cmd_buffer->cs, S_028054_BASE_HI(ds->db_z_write_base >> 32)); /* DB_Z_WRITE_BASE_HI */
+ radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* DB_STENCIL_WRITE_BASE */
+ radeon_emit(cmd_buffer->cs, S_02805C_BASE_HI(ds->db_stencil_write_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */
radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_INFO2, 2);
radeon_emit(cmd_buffer->cs, ds->db_z_info2);
/* Update the ZRANGE_PRECISION value for the TC-compat bug. */
radv_update_zrange_precision(cmd_buffer, ds, iview, layout, true);
- radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
- ds->pa_su_poly_offset_db_fmt_cntl);
+ radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, ds->pa_su_poly_offset_db_fmt_cntl);
}
static void
radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 2);
}
- radeon_emit(cmd_buffer->cs,
- S_028040_FORMAT(V_028040_Z_INVALID) | S_028040_NUM_SAMPLES(num_samples));
+ radeon_emit(cmd_buffer->cs, S_028040_FORMAT(V_028040_Z_INVALID) | S_028040_NUM_SAMPLES(num_samples));
radeon_emit(cmd_buffer->cs, S_028044_FORMAT(V_028044_STENCIL_INVALID));
radeon_set_context_reg(cmd_buffer->cs, R_028000_DB_RENDER_CONTROL, db_render_control);
radeon_set_context_reg(cmd_buffer->cs, R_028010_DB_RENDER_OVERRIDE2,
S_028010_CENTROID_COMPUTATION_MODE(gfx_level >= GFX10_3));
-
}
/**
* Update the fast clear depth/stencil values if the image is bound as a
* depth/stencil buffer.
*/
static void
-radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview,
+radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects)
{
const struct radv_image *image = iview->image;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
- if (cmd_buffer->state.render.ds_att.iview == NULL ||
- cmd_buffer->state.render.ds_att.iview->image != image)
+ if (cmd_buffer->state.render.ds_att.iview == NULL || cmd_buffer->state.render.ds_att.iview->image != image)
return;
if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
*/
static void
radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- const VkImageSubresourceRange *range,
- VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects)
+ const VkImageSubresourceRange *range, VkClearDepthStencilValue ds_clear_value,
+ VkImageAspectFlags aspects)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
}
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating));
- radeon_emit(cs,
- S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, value);
}
static void
-radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview,
+radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
VkClearDepthStencilValue ds_clear_value)
{
VkImageSubresourceRange range = {
* Update the clear depth/stencil values for this image.
*/
void
-radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview,
+radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects)
{
VkImageSubresourceRange range = {
uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
uint32_t count = 2 * level_count;
- ASSERTED unsigned cdw_max =
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4 + count);
+ ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4 + count);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
- radeon_emit(cmd_buffer->cs,
- S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+ radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cmd_buffer->cs, va);
radeon_emit(cmd_buffer->cs, va >> 32);
assert(radv_dcc_enabled(image, range->baseMipLevel));
- ASSERTED unsigned cdw_max =
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4 + count);
+ ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4 + count);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
- radeon_emit(cmd_buffer->cs,
- S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+ radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cmd_buffer->cs, va);
radeon_emit(cmd_buffer->cs, va >> 32);
* Update the fast clear color values if the image is bound as a color buffer.
*/
static void
-radv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- int cb_idx, uint32_t color_values[2])
+radv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, int cb_idx,
+ uint32_t color_values[2])
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
- if (cb_idx >= cmd_buffer->state.render.color_att_count ||
- cmd_buffer->state.render.color_att[cb_idx].iview == NULL ||
+ if (cb_idx >= cmd_buffer->state.render.color_att_count || cmd_buffer->state.render.color_att[cb_idx].iview == NULL ||
cmd_buffer->state.render.color_att[cb_idx].iview->image != image)
return;
- ASSERTED unsigned cdw_max =
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4);
+ ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4);
radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2);
radeon_emit(cs, color_values[0]);
if (radv_image_has_clear_value(image)) {
uint64_t va = radv_image_get_fast_clear_va(image, range->baseMipLevel);
- ASSERTED unsigned cdw_max =
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4 + count);
+ ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4 + count);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, cmd_buffer->state.predicating));
radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
* Update the clear color values for this image.
*/
void
-radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview, int cb_idx,
+radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, int cb_idx,
uint32_t color_values[2])
{
struct radv_image *image = iview->image;
* Load the clear color values from the image's metadata.
*/
static void
-radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *iview,
- int cb_idx)
+radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *iview, int cb_idx)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_image *image = iview->image;
radeon_emit(cs, 2);
} else {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) |
- COPY_DATA_COUNT_SEL);
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_COUNT_SEL);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, reg >> 2);
if (!iview)
continue;
- if ((radv_image_has_CB_metadata(iview->image) ||
- radv_dcc_enabled(iview->image, iview->vk.base_mip_level) ||
+ if ((radv_image_has_CB_metadata(iview->image) || radv_dcc_enabled(iview->image, iview->vk.base_mip_level) ||
radv_dcc_enabled(iview->image, cmd_buffer->state.cb_mip[i])) &&
cmd_buffer->state.cb_mip[i] != iview->vk.base_mip_level)
color_mip_changed = true;
}
if (color_mip_changed) {
- cmd_buffer->state.flush_bits |=
- RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
}
}
}
if (need_color_mip_flush) {
- cmd_buffer->state.flush_bits |=
- RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
}
memset(cmd_buffer->state.cb_mip, 0, sizeof(cmd_buffer->state.cb_mip));
int i;
bool disable_constant_encode_ac01 = false;
unsigned color_invalid = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11
- ? S_028C70_FORMAT_GFX11(V_028C70_COLOR_INVALID)
- : S_028C70_FORMAT_GFX6(V_028C70_COLOR_INVALID);
+ ? S_028C70_FORMAT_GFX11(V_028C70_COLOR_INVALID)
+ : S_028C70_FORMAT_GFX6(V_028C70_COLOR_INVALID);
- ASSERTED unsigned cdw_max =
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 48 + MAX_RTS * 70);
+ ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 48 + MAX_RTS * 70);
for (i = 0; i < render->color_att_count; ++i) {
struct radv_image_view *iview = render->color_att[i].iview;
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, iview->image->bindings[0].bo);
assert(iview->vk.aspects & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_PLANE_0_BIT |
- VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT));
+ VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT));
if (iview->image->disjoint && iview->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
for (uint32_t plane_id = 0; plane_id < iview->image->plane_count; plane_id++) {
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
- iview->image->bindings[plane_id].bo);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, iview->image->bindings[plane_id].bo);
}
} else {
uint32_t plane_id = iview->image->disjoint ? iview->plane_id : 0;
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
- iview->image->bindings[plane_id].bo);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, iview->image->bindings[plane_id].bo);
}
radv_emit_fb_color_state(cmd_buffer, i, &render->color_att[i].cb, iview, layout);
radv_load_color_clear_metadata(cmd_buffer, iview, i);
- if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 &&
- iview->image->dcc_sign_reinterpret) {
+ if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 && iview->image->dcc_sign_reinterpret) {
/* Disable constant encoding with the clear value of "1" with different DCC signedness
* because the hardware will fill "1" instead of the clear value.
*/
radv_emit_fb_ds_state(cmd_buffer, &render->ds_att.ds, iview, layout);
- if (radv_layout_is_htile_compressed(
- cmd_buffer->device, iview->image, layout,
- radv_image_queue_family_mask(iview->image, cmd_buffer->qf,
- cmd_buffer->qf))) {
+ if (radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, layout,
+ radv_image_queue_family_mask(iview->image, cmd_buffer->qf, cmd_buffer->qf))) {
/* Only load the depth/stencil fast clear values when
* compressed rendering is enabled.
*/
radv_load_ds_clear_metadata(cmd_buffer, iview);
}
- } else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX10_3 &&
- render->vrs_att.iview && radv_cmd_buffer_get_vrs_image(cmd_buffer)) {
+ } else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX10_3 && render->vrs_att.iview &&
+ radv_cmd_buffer_get_vrs_image(cmd_buffer)) {
/* When a subpass uses a VRS attachment without binding a depth/stencil attachment, we have to
* bind our internal depth buffer that contains the VRS data as part of HTILE.
*/
}
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX8) {
- bool disable_constant_encode =
- cmd_buffer->device->physical_device->rad_info.has_dcc_constant_encode;
+ bool disable_constant_encode = cmd_buffer->device->physical_device->rad_info.has_dcc_constant_encode;
enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level;
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
- radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_FDCC_CONTROL,
- S_028424_SAMPLE_MASK_TRACKER_WATERMARK(0));
+ radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_FDCC_CONTROL, S_028424_SAMPLE_MASK_TRACKER_WATERMARK(0));
} else {
- uint8_t watermark = gfx_level >= GFX10 ? 6 : 4;
+ uint8_t watermark = gfx_level >= GFX10 ? 6 : 4;
radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL,
S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(gfx_level <= GFX9) |
- S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
- S_028424_DISABLE_CONSTANT_ENCODE_AC01(disable_constant_encode_ac01) |
- S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
+ S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
+ S_028424_DISABLE_CONSTANT_ENCODE_AC01(disable_constant_encode_ac01) |
+ S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
}
}
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
unsigned rast_prim = radv_get_rasterization_prim(cmd_buffer);
- si_write_guardband(cmd_buffer->cs, d->vk.vp.viewport_count, d->vk.vp.viewports, rast_prim,
- d->vk.rs.polygon_mode, d->vk.rs.line.width);
+ si_write_guardband(cmd_buffer->cs, d->vk.vp.viewport_count, d->vk.vp.viewports, rast_prim, d->vk.rs.polygon_mode,
+ d->vk.rs.line.width);
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_GUARDBAND;
}
if (state->index_type < 0)
return;
- if (state->max_index_count ||
- !cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) {
+ if (state->max_index_count || !cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) {
radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0));
radeon_emit(cs, state->index_va);
radeon_emit(cs, state->index_va >> 32);
radv_flush_occlusion_query_state(struct radv_cmd_buffer *cmd_buffer)
{
const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level;
- const bool enable_occlusion_queries = cmd_buffer->state.active_occlusion_queries ||
- cmd_buffer->state.inherited_occlusion_queries;
+ const bool enable_occlusion_queries =
+ cmd_buffer->state.active_occlusion_queries || cmd_buffer->state.inherited_occlusion_queries;
uint32_t db_count_control;
if (!enable_occlusion_queries) {
} else {
uint32_t sample_rate = util_logbase2(cmd_buffer->state.render.max_samples);
bool gfx10_perfect =
- gfx_level >= GFX10 &&
- (cmd_buffer->state.perfect_occlusion_queries_enabled ||
- cmd_buffer->state.inherited_query_control_flags & VK_QUERY_CONTROL_PRECISE_BIT);
+ gfx_level >= GFX10 && (cmd_buffer->state.perfect_occlusion_queries_enabled ||
+ cmd_buffer->state.inherited_query_control_flags & VK_QUERY_CONTROL_PRECISE_BIT);
if (gfx_level >= GFX7) {
/* Always enable PERFECT_ZPASS_COUNTS due to issues with partially
*/
/* From total number of attributes to offset. */
- static const uint16_t total_to_offset[16] = {0, 1, 4, 10, 20, 35, 56, 84,
- 120, 165, 220, 286, 364, 455, 560, 680};
+ static const uint16_t total_to_offset[16] = {0, 1, 4, 10, 20, 35, 56, 84, 120, 165, 220, 286, 364, 455, 560, 680};
unsigned start_index = total_to_offset[num_attributes - 1];
/* From number of instanced attributes to offset. This would require a different LUT depending on
static const uint8_t count_to_offset_total16[16] = {0, 16, 31, 45, 58, 70, 81, 91,
100, 108, 115, 121, 126, 130, 133, 135};
unsigned count = util_bitcount(instance_rate_inputs);
- unsigned offset_from_start_index =
- count_to_offset_total16[count - 1] - ((16 - num_attributes) * (count - 1));
+ unsigned offset_from_start_index = count_to_offset_total16[count - 1] - ((16 - num_attributes) * (count - 1));
unsigned first = ffs(instance_rate_inputs) - 1;
return start_index + offset_from_start_index + first;
}
static struct radv_shader_part *
-lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs_shader,
- uint32_t *nontrivial_divisors)
+lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs_shader, uint32_t *nontrivial_divisors)
{
STATIC_ASSERT(sizeof(union vs_prolog_key_header) == 4);
assert(vs_shader->info.vs.dynamic_inputs);
uint64_t vb_offset = cmd_buffer->vertex_bindings[binding].offset;
uint64_t vb_stride;
- if (pipeline->dynamic_states & (RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE |
- RADV_DYNAMIC_VERTEX_INPUT)) {
+ if (pipeline->dynamic_states & (RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | RADV_DYNAMIC_VERTEX_INPUT)) {
vb_stride = cmd_buffer->vertex_bindings[binding].stride;
} else {
vb_stride = pipeline->binding_stride[binding];
const bool can_use_simple_input =
cmd_buffer->state.shaders[MESA_SHADER_VERTEX] &&
- cmd_buffer->state.shaders[MESA_SHADER_VERTEX]->info.is_ngg ==
- device->physical_device->use_ngg &&
- cmd_buffer->state.shaders[MESA_SHADER_VERTEX]->info.wave_size ==
- device->physical_device->ge_wave_size;
+ cmd_buffer->state.shaders[MESA_SHADER_VERTEX]->info.is_ngg == device->physical_device->use_ngg &&
+ cmd_buffer->state.shaders[MESA_SHADER_VERTEX]->info.wave_size == device->physical_device->ge_wave_size;
/* try to use a pre-compiled prolog first */
struct radv_shader_part *prolog = NULL;
- if (can_use_simple_input && (!vs_shader->info.vs.as_ls || !instance_rate_inputs) &&
- !misaligned_mask && !state->alpha_adjust_lo && !state->alpha_adjust_hi) {
+ if (can_use_simple_input && (!vs_shader->info.vs.as_ls || !instance_rate_inputs) && !misaligned_mask &&
+ !state->alpha_adjust_lo && !state->alpha_adjust_hi) {
if (!instance_rate_inputs) {
prolog = device->simple_vs_prologs[num_attributes - 1];
} else if (num_attributes <= 16 && !*nontrivial_divisors && !zero_divisors &&
uint8_t *formats = (uint8_t *)&key_words[key_size];
unsigned num_formats = 0;
- u_foreach_bit(index, misaligned_mask) formats[num_formats++] = state->formats[index];
+ u_foreach_bit (index, misaligned_mask)
+ formats[num_formats++] = state->formats[index];
while (num_formats & 0x3)
formats[num_formats++] = 0;
key_size += num_formats / 4u;
uint32_t hash = radv_hash_vs_prolog(key_words);
- if (cmd_buffer->state.emitted_vs_prolog &&
- cmd_buffer->state.emitted_vs_prolog_key_hash == hash &&
+ if (cmd_buffer->state.emitted_vs_prolog && cmd_buffer->state.emitted_vs_prolog_key_hash == hash &&
radv_cmp_vs_prolog(key_words, cmd_buffer->state.emitted_vs_prolog_key))
return cmd_buffer->state.emitted_vs_prolog;
u_rwlock_rdlock(&device->vs_prologs_lock);
- struct hash_entry *prolog_entry =
- _mesa_hash_table_search_pre_hashed(device->vs_prologs, hash, key_words);
+ struct hash_entry *prolog_entry = _mesa_hash_table_search_pre_hashed(device->vs_prologs, hash, key_words);
u_rwlock_rdunlock(&device->vs_prologs_lock);
if (!prolog_entry) {
*(inputs++) = input_va;
*(inputs++) = input_va >> 32;
- u_foreach_bit(index, nontrivial_divisors)
- {
+ u_foreach_bit (index, nontrivial_divisors) {
uint32_t div = state->divisors[index];
if (div == 0) {
*(inputs++) = 0;
input_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + inputs_offset;
}
- const struct radv_userdata_info *loc =
- &vs_shader->info.user_sgprs_locs.shader_data[AC_UD_VS_PROLOG_INPUTS];
+ const struct radv_userdata_info *loc = &vs_shader->info.user_sgprs_locs.shader_data[AC_UD_VS_PROLOG_INPUTS];
uint32_t base_reg = vs_shader->info.user_data_0;
assert(loc->sgpr_idx != -1);
assert(loc->num_sgprs == 2);
- radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
- input_va, true);
+ radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, input_va, true);
}
static void
radv_emit_vertex_input(struct radv_cmd_buffer *cmd_buffer)
{
- const struct radv_shader *vs_shader =
- radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_VERTEX);
+ const struct radv_shader *vs_shader = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_VERTEX);
assert(!cmd_buffer->state.mesh_shading);
return;
uint32_t nontrivial_divisors;
- struct radv_shader_part *prolog =
- lookup_vs_prolog(cmd_buffer, vs_shader, &nontrivial_divisors);
+ struct radv_shader_part *prolog = lookup_vs_prolog(cmd_buffer, vs_shader, &nontrivial_divisors);
if (!prolog) {
vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
return;
}
radeon_set_context_reg(cmd_buffer->cs, R_028B6C_VGT_TF_PARAM,
- S_028B6C_TYPE(type) | S_028B6C_PARTITIONING(partitioning) |
- S_028B6C_TOPOLOGY(topology) |
+ S_028B6C_TYPE(type) | S_028B6C_PARTITIONING(partitioning) | S_028B6C_TOPOLOGY(topology) |
S_028B6C_DISTRIBUTION_MODE(distribution_mode));
}
* First, get rid of DST in the blend factors:
* func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
*/
- si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, VK_BLEND_FACTOR_DST_COLOR,
- VK_BLEND_FACTOR_SRC_COLOR);
+ si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, VK_BLEND_FACTOR_DST_COLOR, VK_BLEND_FACTOR_SRC_COLOR);
- si_blend_remove_dst(&eqA, &srcA, &dstA, VK_BLEND_FACTOR_DST_COLOR,
- VK_BLEND_FACTOR_SRC_COLOR);
+ si_blend_remove_dst(&eqA, &srcA, &dstA, VK_BLEND_FACTOR_DST_COLOR, VK_BLEND_FACTOR_SRC_COLOR);
- si_blend_remove_dst(&eqA, &srcA, &dstA, VK_BLEND_FACTOR_DST_ALPHA,
- VK_BLEND_FACTOR_SRC_ALPHA);
+ si_blend_remove_dst(&eqA, &srcA, &dstA, VK_BLEND_FACTOR_DST_ALPHA, VK_BLEND_FACTOR_SRC_ALPHA);
/* Look up the ideal settings from tables. */
srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
/* Set the final value. */
- sx_mrt_blend_opt[i] =
- S_028760_COLOR_SRC_OPT(srcRGB_opt) | S_028760_COLOR_DST_OPT(dstRGB_opt) |
- S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
- S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) |
- S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
+ sx_mrt_blend_opt[i] = S_028760_COLOR_SRC_OPT(srcRGB_opt) | S_028760_COLOR_DST_OPT(dstRGB_opt) |
+ S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
+ S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) |
+ S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
blend_cntl |= S_028780_ENABLE(1);
blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
/* Disable RB+ blend optimizations for dual source blending. */
if (mrt0_is_dual_src) {
for (unsigned i = 0; i < MAX_RTS; i++) {
- sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
- S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
+ sx_mrt_blend_opt[i] =
+ S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
}
}
/* Disable RB+ blend optimizations on GFX11 when alpha-to-coverage is enabled. */
if (gfx_level >= GFX11 && d->vk.ms.alpha_to_coverage_enable) {
- sx_mrt_blend_opt[0] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
- S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
+ sx_mrt_blend_opt[0] =
+ S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
}
}
radv_normalize_blend_factor(eqRGB, &srcRGB, &dstRGB);
if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
- srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
- dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
- srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA ||
- dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA)
+ srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
+ srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA)
state.need_src_alpha |= 1 << i;
}
uint32_t hash = radv_hash_ps_epilog(&key);
u_rwlock_rdlock(&device->ps_epilogs_lock);
- struct hash_entry *epilog_entry =
- _mesa_hash_table_search_pre_hashed(device->ps_epilogs, hash, &key);
+ struct hash_entry *epilog_entry = _mesa_hash_table_search_pre_hashed(device->ps_epilogs, hash, &key);
u_rwlock_rdunlock(&device->ps_epilogs_lock);
if (!epilog_entry) {
unsigned ps_iter_samples = radv_get_ps_iter_samples(cmd_buffer);
unsigned log_z_samples = util_logbase2(z_samples);
unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples);
- bool uses_underestimate =
- d->vk.rs.conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT;
+ bool uses_underestimate = d->vk.rs.conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT;
- db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
- S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
- S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
- S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples);
+ db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) | S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
+ S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples);
pa_sc_aa_config |= S_028BE0_MSAA_NUM_SAMPLES(uses_underestimate ? 0 : log_samples) |
- S_028BE0_MAX_SAMPLE_DIST(max_sample_dist) |
- S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) |
+ S_028BE0_MAX_SAMPLE_DIST(max_sample_dist) | S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) |
S_028BE0_COVERED_CENTROID_IS_CENTER(pdevice->rad_info.gfx_level >= GFX10_3);
if (d->vk.rs.line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT)
if (pdevice->rad_info.gfx_level == GFX11 && render->ds_att.format == VK_FORMAT_UNDEFINED) {
assert(!render->ds_att.iview);
radeon_set_context_reg(cmd_buffer->cs, R_028040_DB_Z_INFO,
- S_028040_FORMAT(V_028040_Z_INVALID) |
- S_028040_NUM_SAMPLES(log_samples));
+ S_028040_FORMAT(V_028040_Z_INVALID) | S_028040_NUM_SAMPLES(log_samples));
}
radeon_set_context_reg(cmd_buffer->cs, R_028804_DB_EQAA, db_eqaa);
radeon_set_context_reg(cmd_buffer->cs, R_028BE0_PA_SC_AA_CONFIG, pa_sc_aa_config);
- radeon_set_context_reg(cmd_buffer->cs, R_028A48_PA_SC_MODE_CNTL_0,
- S_028A48_ALTERNATE_RBS_PER_TILE(pdevice->rad_info.gfx_level >= GFX9) |
- S_028A48_VPORT_SCISSOR_ENABLE(1) |
- S_028A48_LINE_STIPPLE_ENABLE(d->vk.rs.line.stipple.enable) |
- S_028A48_MSAA_ENABLE(rasterization_samples > 1));
+ radeon_set_context_reg(
+ cmd_buffer->cs, R_028A48_PA_SC_MODE_CNTL_0,
+ S_028A48_ALTERNATE_RBS_PER_TILE(pdevice->rad_info.gfx_level >= GFX9) | S_028A48_VPORT_SCISSOR_ENABLE(1) |
+ S_028A48_LINE_STIPPLE_ENABLE(d->vk.rs.line.stipple.enable) | S_028A48_MSAA_ENABLE(rasterization_samples > 1));
}
static void
/* The DX10 diamond test is unnecessary with Vulkan and it decreases line rasterization
* performance.
*/
- radeon_set_context_reg(cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL,
- S_028BDC_PERPENDICULAR_ENDCAP_ENA(
- d->vk.rs.line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT));
+ radeon_set_context_reg(
+ cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL,
+ S_028BDC_PERPENDICULAR_ENDCAP_ENA(d->vk.rs.line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT));
}
static bool
static void
radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const uint64_t states)
{
- if (states & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE))
+ if (states & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT | RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE |
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE))
radv_emit_viewport(cmd_buffer);
if (states & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT) &&
if (states & RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
radv_emit_blend_constants(cmd_buffer);
- if (states &
- (RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK))
+ if (states & (RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
+ RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK))
radv_emit_stencil(cmd_buffer);
if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS)
if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)
radv_emit_depth_bias(cmd_buffer);
- if (states & (RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE |
- RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE_ENABLE |
+ if (states & (RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE | RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE_ENABLE |
RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE_MODE))
radv_emit_discard_rectangle(cmd_buffer);
if (states & (RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE | RADV_CMD_DIRTY_DYNAMIC_POLYGON_MODE |
- RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE |
- RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE))
+ RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE | RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE))
radv_emit_culling(cmd_buffer);
- if (states & (RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE |
- RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY))
+ if (states & (RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE | RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY))
radv_emit_provoking_vertex_mode(cmd_buffer);
if (states & RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)
radv_emit_primitive_topology(cmd_buffer);
- if (states &
- (RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP))
+ if (states & (RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
+ RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP))
radv_emit_depth_control(cmd_buffer);
if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP)
if (states & RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE)
radv_emit_primitive_restart_enable(cmd_buffer);
- if (states & (RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE))
+ if (states & (RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE |
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE))
radv_emit_clipping(cmd_buffer);
if (states & (RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP | RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK |
- RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE |
+ RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK | RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE |
RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_EQUATION))
radv_emit_logic_op(cmd_buffer);
- if (states & (RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK))
+ if (states & (RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE | RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK))
radv_emit_color_write(cmd_buffer);
if (states & RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT)
if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_MASK)
radv_emit_sample_mask(cmd_buffer);
- if (states & (RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE))
+ if (states & (RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE))
radv_emit_depth_clamp_enable(cmd_buffer);
- if (states & (RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK |
- RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_EQUATION |
- RADV_CMD_DIRTY_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE))
+ if (states & (RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE | RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK |
+ RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_EQUATION | RADV_CMD_DIRTY_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE))
radv_emit_color_blend(cmd_buffer);
if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE)
radv_emit_line_rasterization_mode(cmd_buffer);
- if (states & (RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES |
- RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE))
+ if (states & (RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE))
radv_emit_rasterization_samples(cmd_buffer);
- if (states & (RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE |
- RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS |
- RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES |
+ if (states & (RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE_ENABLE | RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE |
+ RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS | RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES |
RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE))
radv_emit_msaa_state(cmd_buffer);
- if (states & (RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE))
+ if (states &
+ (RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE | RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE))
radv_emit_attachment_feedback_loop_enable(cmd_buffer);
cmd_buffer->state.dirty &= ~states;
}
static void
-radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer,
- struct radv_descriptor_state *descriptors_state)
+radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_state *descriptors_state)
{
struct radv_descriptor_set *set = (struct radv_descriptor_set *)&descriptors_state->push_set.set;
unsigned bo_offset;
- if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size, set->header.mapped_ptr,
- &bo_offset))
+ if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size, set->header.mapped_ptr, &bo_offset))
return;
set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
}
static void
-radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint bind_point)
+radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
{
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
+ struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point);
uint32_t size = MAX_SETS * 4;
uint32_t offset;
void *ptr;
uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
va += offset;
- ASSERTED unsigned cdw_max =
- radeon_check_space(device->ws, cs, MESA_VULKAN_SHADER_STAGES * 3);
+ ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, MESA_VULKAN_SHADER_STAGES * 3);
if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
for (unsigned s = MESA_SHADER_VERTEX; s <= MESA_SHADER_FRAGMENT; s++)
if (radv_cmdbuf_has_stage(cmd_buffer, s))
radv_emit_userdata_address(device, cs, cmd_buffer->state.shaders[s],
- cmd_buffer->state.shaders[s]->info.user_data_0,
- AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+ cmd_buffer->state.shaders[s]->info.user_data_0, AC_UD_INDIRECT_DESCRIPTOR_SETS,
+ va);
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_MESH))
radv_emit_userdata_address(device, cs, cmd_buffer->state.shaders[MESA_SHADER_MESH],
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
radeon_check_space(device->ws, cmd_buffer->gang.cs, 3);
- radv_emit_userdata_address(device, cmd_buffer->gang.cs,
- cmd_buffer->state.shaders[MESA_SHADER_TASK],
+ radv_emit_userdata_address(device, cmd_buffer->gang.cs, cmd_buffer->state.shaders[MESA_SHADER_TASK],
cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.user_data_0,
AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
}
}
ALWAYS_INLINE static void
-radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages,
- VkPipelineBindPoint bind_point)
+radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages, VkPipelineBindPoint bind_point)
{
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
+ struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point);
struct radv_device *device = cmd_buffer->device;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
bool flush_indirect_descriptors;
if (flush_indirect_descriptors)
radv_flush_indirect_descriptor_sets(cmd_buffer, bind_point);
- ASSERTED unsigned cdw_max =
- radeon_check_space(device->ws, cs, MAX_SETS * MESA_VULKAN_SHADER_STAGES * 4);
+ ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, MAX_SETS * MESA_VULKAN_SHADER_STAGES * 4);
if (stages & VK_SHADER_STAGE_COMPUTE_BIT) {
struct radv_shader *compute_shader = bind_point == VK_PIPELINE_BIND_POINT_COMPUTE
? cmd_buffer->state.shaders[MESA_SHADER_COMPUTE]
: cmd_buffer->state.rt_prolog;
- radv_emit_descriptor_pointers(device, cs, compute_shader, compute_shader->info.user_data_0,
- descriptors_state);
+ radv_emit_descriptor_pointers(device, cs, compute_shader, compute_shader->info.user_data_0, descriptors_state);
} else {
radv_foreach_stage(stage, stages & ~VK_SHADER_STAGE_TASK_BIT_EXT)
{
continue;
radv_emit_descriptor_pointers(device, cs, cmd_buffer->state.shaders[stage],
- cmd_buffer->state.shaders[stage]->info.user_data_0,
- descriptors_state);
+ cmd_buffer->state.shaders[stage]->info.user_data_0, descriptors_state);
}
if (stages & VK_SHADER_STAGE_TASK_BIT_EXT) {
- radv_emit_descriptor_pointers(device, cmd_buffer->gang.cs,
- cmd_buffer->state.shaders[MESA_SHADER_TASK],
+ radv_emit_descriptor_pointers(device, cmd_buffer->gang.cs, cmd_buffer->state.shaders[MESA_SHADER_TASK],
cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.user_data_0,
descriptors_state);
}
}
static void
-radv_emit_all_inline_push_consts(struct radv_device *device, struct radeon_cmdbuf *cs,
- struct radv_shader *shader, uint32_t base_reg,
- uint32_t *values, bool *need_push_constants)
+radv_emit_all_inline_push_consts(struct radv_device *device, struct radeon_cmdbuf *cs, struct radv_shader *shader,
+ uint32_t base_reg, uint32_t *values, bool *need_push_constants)
{
if (radv_get_user_sgpr(shader, AC_UD_PUSH_CONSTANTS)->sgpr_idx != -1)
*need_push_constants |= true;
const uint8_t base = ffs(mask) - 1;
if (mask == u_bit_consecutive64(base, util_last_bit64(mask) - base)) {
/* consecutive inline push constants */
- radv_emit_inline_push_consts(device, cs, shader, base_reg, AC_UD_INLINE_PUSH_CONSTANTS,
- values + base);
+ radv_emit_inline_push_consts(device, cs, shader, base_reg, AC_UD_INLINE_PUSH_CONSTANTS, values + base);
} else {
/* sparse inline push constants */
uint32_t consts[AC_MAX_INLINE_PUSH_CONSTS];
unsigned num_consts = 0;
u_foreach_bit64 (idx, mask)
consts[num_consts++] = values[idx];
- radv_emit_inline_push_consts(device, cs, shader, base_reg, AC_UD_INLINE_PUSH_CONSTANTS,
- consts);
+ radv_emit_inline_push_consts(device, cs, shader, base_reg, AC_UD_INLINE_PUSH_CONSTANTS, consts);
}
}
radv_must_flush_constants(const struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages,
VkPipelineBindPoint bind_point)
{
- const struct radv_push_constant_state *push_constants =
- radv_get_push_constants_state(cmd_buffer, bind_point);
+ const struct radv_push_constant_state *push_constants = radv_get_push_constants_state(cmd_buffer, bind_point);
if (push_constants->size || push_constants->dynamic_offset_count)
return stages & cmd_buffer->push_constant_stages;
}
static void
-radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages,
- VkPipelineBindPoint bind_point)
+radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages, VkPipelineBindPoint bind_point)
{
struct radv_device *device = cmd_buffer->device;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
- const struct radv_push_constant_state *push_constants =
- radv_get_push_constants_state(cmd_buffer, bind_point);
+ struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point);
+ const struct radv_push_constant_state *push_constants = radv_get_push_constants_state(cmd_buffer, bind_point);
struct radv_shader *shader, *prev_shader;
bool need_push_constants = false;
unsigned offset;
radv_emit_all_inline_push_consts(device, cs, compute_shader, compute_shader->info.user_data_0,
(uint32_t *)cmd_buffer->push_constants, &need_push_constants);
} else {
- radv_foreach_stage(stage, internal_stages & ~VK_SHADER_STAGE_TASK_BIT_EXT) {
+ radv_foreach_stage(stage, internal_stages & ~VK_SHADER_STAGE_TASK_BIT_EXT)
+ {
shader = radv_get_shader(cmd_buffer->state.shaders, stage);
if (!shader)
continue;
radv_emit_all_inline_push_consts(device, cs, shader, shader->info.user_data_0,
- (uint32_t *)cmd_buffer->push_constants,
- &need_push_constants);
+ (uint32_t *)cmd_buffer->push_constants, &need_push_constants);
}
if (internal_stages & VK_SHADER_STAGE_TASK_BIT_EXT) {
- radv_emit_all_inline_push_consts(device, cmd_buffer->gang.cs,
- cmd_buffer->state.shaders[MESA_SHADER_TASK],
+ radv_emit_all_inline_push_consts(device, cmd_buffer->gang.cs, cmd_buffer->state.shaders[MESA_SHADER_TASK],
cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.user_data_0,
- (uint32_t *)cmd_buffer->push_constants,
- &need_push_constants);
+ (uint32_t *)cmd_buffer->push_constants, &need_push_constants);
}
}
if (need_push_constants) {
- if (!radv_cmd_buffer_upload_alloc(
- cmd_buffer, push_constants->size + 16 * push_constants->dynamic_offset_count, &offset,
- &ptr))
+ if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_constants->size + 16 * push_constants->dynamic_offset_count,
+ &offset, &ptr))
return;
memcpy(ptr, cmd_buffer->push_constants, push_constants->size);
? cmd_buffer->state.shaders[MESA_SHADER_COMPUTE]
: cmd_buffer->state.rt_prolog;
- radv_emit_userdata_address(device, cs, compute_shader, compute_shader->info.user_data_0,
- AC_UD_PUSH_CONSTANTS, va);
+ radv_emit_userdata_address(device, cs, compute_shader, compute_shader->info.user_data_0, AC_UD_PUSH_CONSTANTS,
+ va);
} else {
prev_shader = NULL;
radv_foreach_stage(stage, internal_stages & ~VK_SHADER_STAGE_TASK_BIT_EXT)
/* Avoid redundantly emitting the address for merged stages. */
if (shader && shader != prev_shader) {
- radv_emit_userdata_address(device, cs, shader, shader->info.user_data_0,
- AC_UD_PUSH_CONSTANTS, va);
+ radv_emit_userdata_address(device, cs, shader, shader->info.user_data_0, AC_UD_PUSH_CONSTANTS, va);
prev_shader = shader;
}
}
if (internal_stages & VK_SHADER_STAGE_TASK_BIT_EXT) {
- radv_emit_userdata_address(device, cmd_buffer->gang.cs,
- cmd_buffer->state.shaders[MESA_SHADER_TASK],
+ radv_emit_userdata_address(device, cmd_buffer->gang.cs, cmd_buffer->state.shaders[MESA_SHADER_TASK],
cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.user_data_0,
AC_UD_PUSH_CONSTANTS, va);
}
}
void
-radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer,
- const struct radv_graphics_pipeline *pipeline,
+radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_graphics_pipeline *pipeline,
bool full_null_descriptors, void *vb_ptr)
{
struct radv_shader *vs_shader = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_VERTEX);
vs_shader->info.vs.dynamic_inputs ? &cmd_buffer->state.dynamic_vs_input : NULL;
assert(!vs_state || vs_shader->info.vs.use_per_attribute_vb_descs);
- const struct ac_vtx_format_info *vtx_info_table =
- vs_state ? ac_get_vtx_format_info_table(chip, family) : NULL;
+ const struct ac_vtx_format_info *vtx_info_table = vs_state ? ac_get_vtx_format_info_table(chip, family) : NULL;
while (mask) {
unsigned i = u_bit_scan(&mask);
continue;
}
- unsigned binding =
- vs_state ? cmd_buffer->state.dynamic_vs_input.bindings[i]
- : (vs_shader->info.vs.use_per_attribute_vb_descs ? pipeline->attrib_bindings[i] : i);
+ unsigned binding = vs_state ? cmd_buffer->state.dynamic_vs_input.bindings[i]
+ : (vs_shader->info.vs.use_per_attribute_vb_descs ? pipeline->attrib_bindings[i] : i);
struct radv_buffer *buffer = cmd_buffer->vertex_binding_buffers[binding];
unsigned num_records;
unsigned stride;
if (chip >= GFX10) {
rsrc_word3 = vtx_info->dst_sel | S_008F0C_FORMAT(hw_format);
} else {
- rsrc_word3 = vtx_info->dst_sel | S_008F0C_NUM_FORMAT((hw_format >> 4) & 0x7) |
- S_008F0C_DATA_FORMAT(hw_format & 0xf);
+ rsrc_word3 =
+ vtx_info->dst_sel | S_008F0C_NUM_FORMAT((hw_format >> 4) & 0x7) | S_008F0C_DATA_FORMAT(hw_format & 0xf);
}
} else {
- rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (chip >= GFX10)
rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT);
else
- rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ rsrc_word3 |=
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
- if (pipeline->dynamic_states & (RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE |
- RADV_DYNAMIC_VERTEX_INPUT)) {
+ if (pipeline->dynamic_states & (RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | RADV_DYNAMIC_VERTEX_INPUT)) {
stride = cmd_buffer->vertex_bindings[binding].stride;
} else {
stride = pipeline->binding_stride[binding];
}
if (vs_shader->info.vs.use_per_attribute_vb_descs) {
- uint32_t attrib_end =
- vs_state ? vs_state->offsets[i] + vs_state->format_sizes[i] : pipeline->attrib_ends[i];
+ uint32_t attrib_end = vs_state ? vs_state->offsets[i] + vs_state->format_sizes[i] : pipeline->attrib_ends[i];
if (num_records < attrib_end) {
num_records = 0; /* not enough space for one vertex */
va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
va += vb_offset;
- radv_emit_userdata_address(cmd_buffer->device, cmd_buffer->cs, vs, vs->info.user_data_0,
- AC_UD_VS_VERTEX_BUFFERS, va);
+ radv_emit_userdata_address(cmd_buffer->device, cmd_buffer->cs, vs, vs->info.user_data_0, AC_UD_VS_VERTEX_BUFFERS,
+ va);
cmd_buffer->state.vb_va = va;
cmd_buffer->state.vb_size = vb_desc_alloc_size;
radv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
{
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
- const struct radv_userdata_info *loc =
- radv_get_user_sgpr(last_vgt_shader, AC_UD_STREAMOUT_BUFFERS);
+ const struct radv_userdata_info *loc = radv_get_user_sgpr(last_vgt_shader, AC_UD_STREAMOUT_BUFFERS);
uint32_t base_reg;
if (loc->sgpr_idx == -1)
base_reg = last_vgt_shader->info.user_data_0;
- radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va,
- false);
+ radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va, false);
if (cmd_buffer->state.gs_copy_shader) {
loc = &cmd_buffer->state.gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_STREAMOUT_BUFFERS];
if (loc->sgpr_idx != -1) {
base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0;
- radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
- va, false);
+ radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va, false);
}
}
}
}
}
- uint32_t rsrc_word3 =
- S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+ uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
- rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
+ rsrc_word3 |=
+ S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
radv_flush_ngg_query_state(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
- const struct radv_userdata_info *loc =
- radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_QUERY_STATE);
+ const struct radv_userdata_info *loc = radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_QUERY_STATE);
enum radv_ngg_query_state ngg_query_state = radv_ngg_query_none;
uint32_t base_reg;
* primitives.
*/
if (cmd_buffer->state.active_pipeline_gds_queries ||
- (cmd_buffer->state.inherited_pipeline_statistics &
- VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT))
+ (cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT))
ngg_query_state |= radv_ngg_query_pipeline_stat;
if (cmd_buffer->state.active_prims_gen_gds_queries)
break;
}
- if (cmd_buffer->state.last_vrs_rates != vrs_rates ||
- cmd_buffer->state.last_vrs_rates_sgpr_idx != loc->sgpr_idx) {
+ if (cmd_buffer->state.last_vrs_rates != vrs_rates || cmd_buffer->state.last_vrs_rates_sgpr_idx != loc->sgpr_idx) {
radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, vrs_rates);
}
VkShaderStageFlags stages = VK_SHADER_STAGE_ALL_GRAPHICS;
radv_flush_descriptors(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS);
- const VkShaderStageFlags pc_stages =
- radv_must_flush_constants(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS);
+ const VkShaderStageFlags pc_stages = radv_must_flush_constants(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS);
if (pc_stages)
radv_flush_constants(cmd_buffer, pc_stages, VK_PIPELINE_BIND_POINT_GRAPHICS);
};
static void
-si_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
- bool indirect_draw, bool count_from_stream_output,
- uint32_t draw_vertex_count)
+si_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, bool indirect_draw,
+ bool count_from_stream_output, uint32_t draw_vertex_count)
{
const struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
struct radv_cmd_state *state = &cmd_buffer->state;
unsigned ia_multi_vgt_param;
ia_multi_vgt_param =
- si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, count_from_stream_output,
- draw_vertex_count, topology, prim_restart_enable,
- patch_control_points, state->tess_num_patches);
+ si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, count_from_stream_output, draw_vertex_count,
+ topology, prim_restart_enable, patch_control_points, state->tess_num_patches);
if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) {
if (info->gfx_level == GFX9) {
- radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs,
- R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param);
+ radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs, R_030960_IA_MULTI_VGT_PARAM, 4,
+ ia_multi_vgt_param);
} else if (info->gfx_level >= GFX7) {
radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
} else {
break_wave_at_eoi = true;
}
} else if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY)) {
- const struct radv_legacy_gs_info *gs_state =
- &cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info;
+ const struct radv_legacy_gs_info *gs_state = &cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info;
primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(gs_state->vgt_gs_onchip_cntl);
} else {
primgroup_size = 128; /* recommended without a GS and tess */
}
- ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(primgroup_size) |
- S_03096C_VERT_GRP_SIZE(256) | /* disable vertex grouping */
+ ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(primgroup_size) | S_03096C_VERT_GRP_SIZE(256) | /* disable vertex grouping */
S_03096C_PACKET_TO_ONE_PA(0) /* this should only be set if LINE_STIPPLE_TEX_ENA == 1 */ |
S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
gfx10_emit_ge_cntl(cmd_buffer);
} else {
si_emit_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1, draw_info->indirect,
- !!draw_info->strmout_buffer,
- draw_info->indirect ? 0 : draw_info->count);
+ !!draw_info->strmout_buffer, draw_info->indirect ? 0 : draw_info->count);
}
/* RDNA2 is affected by a hardware bug when instance packing is enabled for adjacent primitive
disable_instance_packing = true;
}
- if ((draw_info->indexed &&
- (state->index_type != state->last_index_type || cmd_buffer->device->uses_shadow_regs)) ||
+ if ((draw_info->indexed && (state->index_type != state->last_index_type || cmd_buffer->device->uses_shadow_regs)) ||
(info->gfx_level == GFX10_3 &&
(state->last_index_type == -1 ||
disable_instance_packing != G_028A7C_DISABLE_INSTANCE_PACKING(state->last_index_type)))) {
uint32_t index_type = state->index_type | S_028A7C_DISABLE_INSTANCE_PACKING(disable_instance_packing);
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
- radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs,
- R_03090C_VGT_INDEX_TYPE, 2, index_type);
+ radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs, R_03090C_VGT_INDEX_TYPE, 2, index_type);
} else {
radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
radeon_emit(cs, index_type);
if (src_stage_mask & VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT)
src_stage_mask |= VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT;
- if (src_stage_mask & (VK_PIPELINE_STAGE_2_COPY_BIT |
- VK_PIPELINE_STAGE_2_RESOLVE_BIT |
- VK_PIPELINE_STAGE_2_BLIT_BIT |
+ if (src_stage_mask & (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_RESOLVE_BIT | VK_PIPELINE_STAGE_2_BLIT_BIT |
VK_PIPELINE_STAGE_2_CLEAR_BIT)) {
/* Be conservative for now. */
src_stage_mask |= VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT;
if (src_stage_mask &
(VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT |
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR |
- VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR |
- VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
- VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) {
+ VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR | VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR |
+ VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
}
- if (src_stage_mask &
- (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
- VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT |
- VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
- VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) {
+ if (src_stage_mask & (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
+ VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT |
+ VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
+ VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
} else if (src_stage_mask &
(VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT |
- VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT |
- VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT |
- VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT |
- VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT |
- VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT |
- VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT |
+ VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT |
+ VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT |
+ VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT | VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT |
VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH;
}
*/
enum radv_cmd_flush_bits
-radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 src_flags,
- const struct radv_image *image)
+radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 src_flags, const struct radv_image *image)
{
bool has_CB_meta = true, has_DB_meta = true;
bool image_is_coherent = image ? image->l2_coherent : false;
has_DB_meta = false;
}
- u_foreach_bit64(b, src_flags)
- {
+ u_foreach_bit64 (b, src_flags) {
switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
case VK_ACCESS_2_SHADER_WRITE_BIT:
case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT:
}
enum radv_cmd_flush_bits
-radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_flags,
- const struct radv_image *image)
+radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_flags, const struct radv_image *image)
{
bool has_CB_meta = true, has_DB_meta = true;
enum radv_cmd_flush_bits flush_bits = 0;
/* All the L2 invalidations below are not the CB/DB. So if there are no incoherent images
* in the L2 cache in CB/DB mode then they are already usable from all the other L2 clients. */
- image_is_coherent |=
- can_skip_buffer_l2_flushes(cmd_buffer->device) && !cmd_buffer->state.rb_noncoherent_dirty;
+ image_is_coherent |= can_skip_buffer_l2_flushes(cmd_buffer->device) && !cmd_buffer->state.rb_noncoherent_dirty;
- u_foreach_bit64(b, dst_flags)
- {
+ u_foreach_bit64 (b, dst_flags) {
switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT:
/* SMEM loads are used to read compute dispatch size in shaders */
}
void
-radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_resolve_barrier *barrier)
+radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_resolve_barrier *barrier)
{
struct radv_rendering_state *render = &cmd_buffer->state.render;
if (!iview)
continue;
- cmd_buffer->state.flush_bits |=
- radv_src_access_flush(cmd_buffer, barrier->src_access_mask, iview->image);
+ cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_access_mask, iview->image);
}
if (render->ds_att.iview) {
cmd_buffer->state.flush_bits |=
if (!iview)
continue;
- cmd_buffer->state.flush_bits |=
- radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, iview->image);
+ cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, iview->image);
}
if (render->ds_att.iview) {
cmd_buffer->state.flush_bits |=
static void
radv_handle_image_transition_separate(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
VkImageLayout src_layout, VkImageLayout dst_layout,
- VkImageLayout src_stencil_layout,
- VkImageLayout dst_stencil_layout,
+ VkImageLayout src_stencil_layout, VkImageLayout dst_stencil_layout,
uint32_t src_family_index, uint32_t dst_family_index,
const VkImageSubresourceRange *range,
struct radv_sample_locations_state *sample_locs)
/* Depth-only transitions. */
if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
aspect_range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
- radv_handle_image_transition(cmd_buffer, image, src_layout, dst_layout,
- src_family_index, dst_family_index, &aspect_range, sample_locs);
+ radv_handle_image_transition(cmd_buffer, image, src_layout, dst_layout, src_family_index, dst_family_index,
+ &aspect_range, sample_locs);
}
/* Stencil-only transitions. */
aspect_range.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
- radv_handle_image_transition(cmd_buffer, image, src_stencil_layout, dst_stencil_layout,
- src_family_index, dst_family_index, &aspect_range, sample_locs);
+ radv_handle_image_transition(cmd_buffer, image, src_stencil_layout, dst_stencil_layout, src_family_index,
+ dst_family_index, &aspect_range, sample_locs);
} else {
- radv_handle_image_transition(cmd_buffer, image, src_layout, dst_layout,
- src_family_index, dst_family_index, range, sample_locs);
+ radv_handle_image_transition(cmd_buffer, image, src_layout, dst_layout, src_family_index, dst_family_index, range,
+ sample_locs);
}
}
static void
-radv_handle_rendering_image_transition(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *view,
- uint32_t layer_count,
- uint32_t view_mask,
- VkImageLayout initial_layout,
- VkImageLayout initial_stencil_layout,
- VkImageLayout final_layout,
+radv_handle_rendering_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *view,
+ uint32_t layer_count, uint32_t view_mask, VkImageLayout initial_layout,
+ VkImageLayout initial_stencil_layout, VkImageLayout final_layout,
VkImageLayout final_stencil_layout,
struct radv_sample_locations_state *sample_locs)
{
range.layerCount = count;
radv_handle_image_transition_separate(cmd_buffer, view->image, initial_layout, final_layout,
- initial_stencil_layout, final_stencil_layout,
- 0, 0, &range, sample_locs);
+ initial_stencil_layout, final_stencil_layout, 0, 0, &range, sample_locs);
}
} else {
range.baseArrayLayer = view->vk.base_array_layer;
range.layerCount = layer_count;
radv_handle_image_transition_separate(cmd_buffer, view->image, initial_layout, final_layout,
- initial_stencil_layout, final_stencil_layout,
- 0, 0, &range, sample_locs);
+ initial_stencil_layout, final_stencil_layout, 0, 0, &range, sample_locs);
}
}
cmd_buffer->state.last_db_count_control = -1;
cmd_buffer->usage_flags = pBeginInfo->flags;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ALL | RADV_CMD_DIRTY_GUARDBAND |
- RADV_CMD_DIRTY_OCCLUSION_QUERY;
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ALL | RADV_CMD_DIRTY_GUARDBAND | RADV_CMD_DIRTY_OCCLUSION_QUERY;
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
uint32_t pred_value = 0;
cmd_buffer->mec_inv_pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
}
- if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 &&
- cmd_buffer->qf == RADV_QUEUE_GENERAL) {
+ if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 && cmd_buffer->qf == RADV_QUEUE_GENERAL) {
unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends;
unsigned fence_offset, eop_bug_offset;
void *fence_ptr;
char gcbiar_data[VK_GCBIARR_DATA_SIZE(MAX_RTS)];
const VkRenderingInfo *resume_info =
- vk_get_command_buffer_inheritance_as_rendering_resume(cmd_buffer->vk.level, pBeginInfo,
- gcbiar_data);
+ vk_get_command_buffer_inheritance_as_rendering_resume(cmd_buffer->vk.level, pBeginInfo, gcbiar_data);
if (resume_info) {
radv_CmdBeginRendering(commandBuffer, resume_info);
} else {
render->max_samples = inheritance_info->rasterizationSamples;
render->color_att_count = inheritance_info->colorAttachmentCount;
for (uint32_t i = 0; i < render->color_att_count; i++) {
- render->color_att[i] = (struct radv_attachment) {
+ render->color_att[i] = (struct radv_attachment){
.format = inheritance_info->pColorAttachmentFormats[i],
};
}
assert(inheritance_info->depthAttachmentFormat == VK_FORMAT_UNDEFINED ||
inheritance_info->stencilAttachmentFormat == VK_FORMAT_UNDEFINED ||
- inheritance_info->depthAttachmentFormat ==
- inheritance_info->stencilAttachmentFormat);
- render->ds_att = (struct radv_attachment) { .iview = NULL };
+ inheritance_info->depthAttachmentFormat == inheritance_info->stencilAttachmentFormat);
+ render->ds_att = (struct radv_attachment){.iview = NULL};
if (inheritance_info->depthAttachmentFormat != VK_FORMAT_UNDEFINED)
render->ds_att.format = inheritance_info->depthAttachmentFormat;
if (inheritance_info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED)
render->ds_att.format = inheritance_info->stencilAttachmentFormat;
}
- cmd_buffer->state.inherited_pipeline_statistics =
- pBeginInfo->pInheritanceInfo->pipelineStatistics;
+ cmd_buffer->state.inherited_pipeline_statistics = pBeginInfo->pInheritanceInfo->pipelineStatistics;
- if (cmd_buffer->state.inherited_pipeline_statistics &
- VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)
+ if (cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
- cmd_buffer->state.inherited_occlusion_queries =
- pBeginInfo->pInheritanceInfo->occlusionQueryEnable;
+ cmd_buffer->state.inherited_occlusion_queries = pBeginInfo->pInheritanceInfo->occlusionQueryEnable;
cmd_buffer->state.inherited_query_control_flags = pBeginInfo->pInheritanceInfo->queryFlags;
if (cmd_buffer->state.inherited_occlusion_queries)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding,
- uint32_t bindingCount, const VkBuffer *pBuffers,
- const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes,
+radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount,
+ const VkBuffer *pBuffers, const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes,
const VkDeviceSize *pStrides)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VkDeviceSize stride = pStrides ? pStrides[i] : vb[idx].stride;
if (!!cmd_buffer->vertex_binding_buffers[idx] != !!buffer ||
- (buffer && ((vb[idx].offset & 0x3) != (pOffsets[i] & 0x3) ||
- (vb[idx].stride & 0x3) != (stride & 0x3)))) {
+ (buffer && ((vb[idx].offset & 0x3) != (pOffsets[i] & 0x3) || (vb[idx].stride & 0x3) != (stride & 0x3)))) {
misaligned_mask_invalid |= state->bindings_match_attrib ? BITFIELD_BIT(idx) : 0xffffffff;
}
cmd_buffer->state.vbo_misaligned_mask &= ~misaligned_mask_invalid;
}
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER |
- RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT;
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER | RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT;
}
static uint32_t
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
- VkIndexType indexType)
+radv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, index_buffer, buffer);
cmd_buffer->state.index_va += index_buffer->offset + offset;
int index_size = radv_get_vgt_index_size(vk_to_index_type(indexType));
- cmd_buffer->state.max_index_count =
- (vk_buffer_range(&index_buffer->vk, offset, VK_WHOLE_SIZE)) / index_size;
+ cmd_buffer->state.max_index_count = (vk_buffer_range(&index_buffer->vk, offset, VK_WHOLE_SIZE)) / index_size;
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, index_buffer->bo);
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
unsigned dyn_idx = 0;
- const bool no_dynamic_bounds =
- cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS;
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
+ const bool no_dynamic_bounds = cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS;
+ struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
for (unsigned i = 0; i < descriptorSetCount; ++i) {
unsigned set_idx = i + firstSet;
/* If the set is already bound we only need to update the
* (potentially changed) dynamic offsets. */
- if (descriptors_state->sets[set_idx] != set ||
- !(descriptors_state->valid & (1u << set_idx))) {
+ if (descriptors_state->sets[set_idx] != set || !(descriptors_state->valid & (1u << set_idx))) {
radv_bind_descriptor_set(cmd_buffer, pipelineBindPoint, set, set_idx);
}
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
- dst[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
+ dst[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
dst[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
static bool
radv_init_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_set *set,
- struct radv_descriptor_set_layout *layout,
- VkPipelineBindPoint bind_point)
+ struct radv_descriptor_set_layout *layout, VkPipelineBindPoint bind_point)
{
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
+ struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point);
set->header.size = layout->size;
if (set->header.layout != layout) {
}
void
-radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout,
- uint32_t set, uint32_t descriptorWriteCount,
+radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint,
+ VkPipelineLayout _layout, uint32_t set, uint32_t descriptorWriteCount,
const VkWriteDescriptorSet *pDescriptorWrites)
{
RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
- struct radv_descriptor_set *push_set =
- (struct radv_descriptor_set *)&cmd_buffer->meta_push_descriptors;
+ struct radv_descriptor_set *push_set = (struct radv_descriptor_set *)&cmd_buffer->meta_push_descriptors;
unsigned bo_offset;
assert(set == 0);
push_set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
push_set->header.va += bo_offset;
- radv_cmd_update_descriptor_sets(cmd_buffer->device, cmd_buffer,
- radv_descriptor_set_to_handle(push_set), descriptorWriteCount,
- pDescriptorWrites, 0, NULL);
+ radv_cmd_update_descriptor_sets(cmd_buffer->device, cmd_buffer, radv_descriptor_set_to_handle(push_set),
+ descriptorWriteCount, pDescriptorWrites, 0, NULL);
radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set);
}
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
- struct radv_descriptor_set *push_set =
- (struct radv_descriptor_set *)&descriptors_state->push_set.set;
+ struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
+ struct radv_descriptor_set *push_set = (struct radv_descriptor_set *)&descriptors_state->push_set.set;
assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
- if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout,
- pipelineBindPoint))
+ if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout, pipelineBindPoint))
return;
/* Check that there are no inline uniform block updates when calling vkCmdPushDescriptorSetKHR()
assert(writeset->descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK);
}
- radv_cmd_update_descriptor_sets(cmd_buffer->device, cmd_buffer,
- radv_descriptor_set_to_handle(push_set), descriptorWriteCount,
- pDescriptorWrites, 0, NULL);
+ radv_cmd_update_descriptor_sets(cmd_buffer->device, cmd_buffer, radv_descriptor_set_to_handle(push_set),
+ descriptorWriteCount, pDescriptorWrites, 0, NULL);
radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set);
VKAPI_ATTR void VKAPI_CALL
radv_CmdPushDescriptorSetWithTemplateKHR(VkCommandBuffer commandBuffer,
- VkDescriptorUpdateTemplate descriptorUpdateTemplate,
- VkPipelineLayout _layout, uint32_t set, const void *pData)
+ VkDescriptorUpdateTemplate descriptorUpdateTemplate, VkPipelineLayout _layout,
+ uint32_t set, const void *pData)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, templ->bind_point);
- struct radv_descriptor_set *push_set =
- (struct radv_descriptor_set *)&descriptors_state->push_set.set;
+ struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, templ->bind_point);
+ struct radv_descriptor_set *push_set = (struct radv_descriptor_set *)&descriptors_state->push_set.set;
assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
- if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout,
- templ->bind_point))
+ if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout, templ->bind_point))
return;
- radv_cmd_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set,
- descriptorUpdateTemplate, pData);
+ radv_cmd_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set, descriptorUpdateTemplate,
+ pData);
radv_set_descriptor_set(cmd_buffer, templ->bind_point, push_set, set);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
- VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
- const void *pValues)
+radv_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags,
+ uint32_t offset, uint32_t size, const void *pValues)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
memcpy(cmd_buffer->push_constants + offset, pValues, size);
radv_emit_mip_change_flush_default(cmd_buffer);
- if (cmd_buffer->qf == RADV_QUEUE_GENERAL ||
- cmd_buffer->qf == RADV_QUEUE_COMPUTE) {
+ if (cmd_buffer->qf == RADV_QUEUE_GENERAL || cmd_buffer->qf == RADV_QUEUE_COMPUTE) {
if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX6)
cmd_buffer->state.flush_bits |=
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
*/
if (cmd_buffer->state.rb_noncoherent_dirty && !can_skip_buffer_l2_flushes(cmd_buffer->device))
cmd_buffer->state.flush_bits |= radv_src_access_flush(
- cmd_buffer,
- VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
- VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
- NULL);
+ cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL);
/* Since NGG streamout uses GDS, we need to make GDS idle when
* we leave the IB, otherwise another process might overwrite
}
static void
-radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer,
- struct radv_compute_pipeline *pipeline)
+radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_compute_pipeline *pipeline)
{
if (pipeline == cmd_buffer->state.emitted_compute_pipeline)
return;
cmd_buffer->compute_scratch_size_per_wave_needed =
MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, pipeline->base.scratch_bytes_per_wave);
- cmd_buffer->compute_scratch_waves_wanted =
- MAX2(cmd_buffer->compute_scratch_waves_wanted, pipeline->base.max_waves);
+ cmd_buffer->compute_scratch_waves_wanted = MAX2(cmd_buffer->compute_scratch_waves_wanted, pipeline->base.max_waves);
if (pipeline->base.type == RADV_PIPELINE_COMPUTE) {
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
- cmd_buffer->state.shaders[MESA_SHADER_COMPUTE]->bo);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->state.shaders[MESA_SHADER_COMPUTE]->bo);
} else {
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->state.rt_prolog->bo);
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
if (!radv_ray_tracing_stage_is_compiled(&rt_pipeline->stages[i]))
continue;
- struct radv_shader *shader = container_of(rt_pipeline->stages[i].shader,
- struct radv_shader, base);
+ struct radv_shader *shader = container_of(rt_pipeline->stages[i].shader, struct radv_shader, base);
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, shader->bo);
}
}
static void
radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
{
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
+ struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point);
descriptors_state->dirty |= descriptors_state->valid;
}
static void
-radv_bind_vs_input_state(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_graphics_pipeline *pipeline)
+radv_bind_vs_input_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_graphics_pipeline *pipeline)
{
const struct radv_shader *vs_shader = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_VERTEX);
const struct radv_vs_input_state *src = &pipeline->vs_input_state;
* two different libraries. Otherwise, if the VS has a prolog, the state is dynamic and there is
* nothing to bind.
*/
- if (!vs_shader || !vs_shader->info.vs.has_prolog ||
- (pipeline->dynamic_states & RADV_DYNAMIC_VERTEX_INPUT))
+ if (!vs_shader || !vs_shader->info.vs.has_prolog || (pipeline->dynamic_states & RADV_DYNAMIC_VERTEX_INPUT))
return;
cmd_buffer->state.dynamic_vs_input = *src;
}
static void
-radv_bind_multisample_state(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_multisample_state *ms)
+radv_bind_multisample_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_multisample_state *ms)
{
if (ms->sample_shading_enable) {
cmd_buffer->state.ms.sample_shading_enable = true;
bool mesh_shading = shader->info.stage == MESA_SHADER_MESH;
const struct radv_userdata_info *loc;
- assert(shader->info.stage == MESA_SHADER_VERTEX ||
- shader->info.stage == MESA_SHADER_TESS_CTRL ||
- shader->info.stage == MESA_SHADER_TESS_EVAL ||
- shader->info.stage == MESA_SHADER_GEOMETRY ||
+ assert(shader->info.stage == MESA_SHADER_VERTEX || shader->info.stage == MESA_SHADER_TESS_CTRL ||
+ shader->info.stage == MESA_SHADER_TESS_EVAL || shader->info.stage == MESA_SHADER_GEOMETRY ||
shader->info.stage == MESA_SHADER_MESH);
if (radv_get_user_sgpr(shader, AC_UD_NGG_PROVOKING_VTX)->sgpr_idx != -1) {
/* Re-emit VRS state because the combiner is different (vertex vs primitive). Re-emit
* primitive topology because the mesh shading pipeline clobbered it.
*/
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE |
- RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
+ cmd_buffer->state.dirty |=
+ RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE | RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
}
cmd_buffer->state.mesh_shading = mesh_shading;
/* Always re-emit patch control points/domain origin when a new pipeline with tessellation is
* bound because a bunch of parameters (user SGPRs, TCS vertices out, ccw, etc) can be different.
*/
- cmd_buffer->state.dirty |=
- RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS | RADV_CMD_DIRTY_DYNAMIC_TESS_DOMAIN_ORIGIN;
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS | RADV_CMD_DIRTY_DYNAMIC_TESS_DOMAIN_ORIGIN;
}
static void
{
radv_bind_pre_rast_shader(cmd_buffer, gs);
- cmd_buffer->esgs_ring_size_needed =
- MAX2(cmd_buffer->esgs_ring_size_needed, gs->info.gs_ring_info.esgs_ring_size);
- cmd_buffer->gsvs_ring_size_needed =
- MAX2(cmd_buffer->gsvs_ring_size_needed, gs->info.gs_ring_info.gsvs_ring_size);
+ cmd_buffer->esgs_ring_size_needed = MAX2(cmd_buffer->esgs_ring_size_needed, gs->info.gs_ring_info.esgs_ring_size);
+ cmd_buffer->gsvs_ring_size_needed = MAX2(cmd_buffer->gsvs_ring_size_needed, gs->info.gs_ring_info.gsvs_ring_size);
}
static void
/* Re-emit the FS state because the SGPR idx can be different. */
if (radv_get_user_sgpr(ps, AC_UD_PS_STATE)->sgpr_idx != -1) {
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES |
- RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE;
+ cmd_buffer->state.dirty |=
+ RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE;
}
/* Re-emit the conservative rasterization mode because inner coverage is different. */
if (gfx_level >= GFX10_3 &&
(!previous_ps || previous_ps->info.ps.reads_sample_mask_in != ps->info.ps.reads_sample_mask_in))
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES |
- RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE;
+ cmd_buffer->state.dirty |=
+ RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE;
if (!previous_ps || radv_ps_can_enable_early_z(previous_ps) != radv_ps_can_enable_early_z(ps))
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE;
/* This function binds/unbinds a shader to the cmdbuffer state. */
static void
-radv_bind_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader,
- gl_shader_stage stage)
+radv_bind_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader, gl_shader_stage stage)
{
if (!shader) {
cmd_buffer->state.shaders[stage] = NULL;
/* Reset some dynamic states when a shader stage is unbound. */
switch (stage) {
case MESA_SHADER_FRAGMENT:
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE |
- RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES |
- RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE |
- RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE;
+ cmd_buffer->state.dirty |=
+ RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE | RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES |
+ RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE | RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE;
break;
default:
break;
cmd_buffer->state.active_stages |= mesa_to_vk_shader_stage(stage);
}
-#define RADV_GRAPHICS_STAGES \
+#define RADV_GRAPHICS_STAGES \
(VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT)
VKAPI_ATTR void VKAPI_CALL
-radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
- VkPipeline _pipeline)
+radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
return;
radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint);
- radv_bind_shader(cmd_buffer, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE],
- MESA_SHADER_COMPUTE);
+ radv_bind_shader(cmd_buffer, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE);
cmd_buffer->state.compute_pipeline = compute_pipeline;
cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
return;
radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint);
- radv_bind_shader(cmd_buffer, rt_pipeline->base.base.shaders[MESA_SHADER_INTERSECTION],
- MESA_SHADER_INTERSECTION);
+ radv_bind_shader(cmd_buffer, rt_pipeline->base.base.shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION);
cmd_buffer->state.rt_prolog = rt_pipeline->base.base.shaders[MESA_SHADER_COMPUTE];
cmd_buffer->state.rt_pipeline = rt_pipeline;
return;
radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint);
- radv_foreach_stage(stage, (cmd_buffer->state.active_stages | graphics_pipeline->active_stages) & RADV_GRAPHICS_STAGES) {
+ radv_foreach_stage(stage,
+ (cmd_buffer->state.active_stages | graphics_pipeline->active_stages) & RADV_GRAPHICS_STAGES)
+ {
radv_bind_shader(cmd_buffer, graphics_pipeline->base.shaders[stage], stage);
}
cmd_buffer->state.gs_copy_shader = graphics_pipeline->base.gs_copy_shader;
- cmd_buffer->state.last_vgt_shader =
- graphics_pipeline->base.shaders[graphics_pipeline->last_vgt_api_stage];
+ cmd_buffer->state.last_vgt_shader = graphics_pipeline->base.shaders[graphics_pipeline->last_vgt_api_stage];
cmd_buffer->state.graphics_pipeline = graphics_pipeline;
cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS;
if (cmd_buffer->device->physical_device->rad_info.has_vgt_flush_ngg_legacy_bug &&
- cmd_buffer->state.emitted_graphics_pipeline &&
- cmd_buffer->state.emitted_graphics_pipeline->is_ngg &&
+ cmd_buffer->state.emitted_graphics_pipeline && cmd_buffer->state.emitted_graphics_pipeline->is_ngg &&
!cmd_buffer->state.graphics_pipeline->is_ngg) {
/* Transitioning from NGG to legacy GS requires
* VGT_FLUSH on GFX10 and Navi21. VGT_FLUSH
}
}
- const struct radv_shader *vs =
- radv_get_shader(graphics_pipeline->base.shaders, MESA_SHADER_VERTEX);
+ const struct radv_shader *vs = radv_get_shader(graphics_pipeline->base.shaders, MESA_SHADER_VERTEX);
if (vs) {
/* Re-emit the VS prolog when a new vertex shader is bound. */
if (vs->info.vs.has_prolog) {
break;
}
- cmd_buffer->push_constant_state[vk_to_bind_point(pipelineBindPoint)].size =
- pipeline->push_constant_size;
+ cmd_buffer->push_constant_state[vk_to_bind_point(pipelineBindPoint)].size = pipeline->push_constant_size;
cmd_buffer->push_constant_state[vk_to_bind_point(pipelineBindPoint)].dynamic_offset_count =
pipeline->dynamic_offset_count;
cmd_buffer->descriptors[vk_to_bind_point(pipelineBindPoint)].need_indirect_descriptor_sets =
pipeline->need_indirect_descriptor_sets;
if (cmd_buffer->device->shader_use_invisible_vram)
- cmd_buffer->shader_upload_seq =
- MAX2(cmd_buffer->shader_upload_seq, pipeline->shader_upload_seq);
+ cmd_buffer->shader_upload_seq = MAX2(cmd_buffer->shader_upload_seq, pipeline->shader_upload_seq);
}
VKAPI_ATTR void VKAPI_CALL
if (state->dynamic.vk.vp.viewport_count < total_count)
state->dynamic.vk.vp.viewport_count = total_count;
- memcpy(state->dynamic.vk.vp.viewports + firstViewport, pViewports,
- viewportCount * sizeof(*pViewports));
+ memcpy(state->dynamic.vk.vp.viewports + firstViewport, pViewports, viewportCount * sizeof(*pViewports));
for (unsigned i = 0; i < viewportCount; i++) {
- radv_get_viewport_xform(&pViewports[i],
- state->dynamic.hw_vp.xform[i + firstViewport].scale,
+ radv_get_viewport_xform(&pViewports[i], state->dynamic.hw_vp.xform[i + firstViewport].scale,
state->dynamic.hw_vp.xform[i + firstViewport].translate);
}
if (state->dynamic.vk.vp.scissor_count < total_count)
state->dynamic.vk.vp.scissor_count = total_count;
- memcpy(state->dynamic.vk.vp.scissors + firstScissor, pScissors,
- scissorCount * sizeof(*pScissors));
+ memcpy(state->dynamic.vk.vp.scissors + firstScissor, pScissors, scissorCount * sizeof(*pScissors));
state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR;
}
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor,
- float depthBiasClamp, float depthBiasSlopeFactor)
+radv_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp,
+ float depthBiasSlopeFactor)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
- uint32_t compareMask)
+radv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
- uint32_t writeMask)
+radv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
- uint32_t reference)
+radv_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES);
assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES);
- typed_memcpy(&state->dynamic.vk.dr.rectangles[firstDiscardRectangle], pDiscardRectangles,
- discardRectangleCount);
+ typed_memcpy(&state->dynamic.vk.dr.rectangles[firstDiscardRectangle], pDiscardRectangles, discardRectangleCount);
state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer,
- const VkSampleLocationsInfoEXT *pSampleLocationsInfo)
+radv_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer, const VkSampleLocationsInfoEXT *pSampleLocationsInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
- typed_memcpy(&state->dynamic.sample_location.locations[0],
- pSampleLocationsInfo->pSampleLocations, pSampleLocationsInfo->sampleLocationsCount);
+ typed_memcpy(&state->dynamic.sample_location.locations[0], pSampleLocationsInfo->pSampleLocations,
+ pSampleLocationsInfo->sampleLocationsCount);
state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetLineStippleEXT(VkCommandBuffer commandBuffer, uint32_t lineStippleFactor,
- uint16_t lineStipplePattern)
+radv_CmdSetLineStippleEXT(VkCommandBuffer commandBuffer, uint32_t lineStippleFactor, uint16_t lineStipplePattern)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetViewportWithCount(VkCommandBuffer commandBuffer, uint32_t viewportCount,
- const VkViewport *pViewports)
+radv_CmdSetViewportWithCount(VkCommandBuffer commandBuffer, uint32_t viewportCount, const VkViewport *pViewports)
{
radv_CmdSetViewport(commandBuffer, 0, viewportCount, pViewports);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetScissorWithCount(VkCommandBuffer commandBuffer, uint32_t scissorCount,
- const VkRect2D *pScissors)
+radv_CmdSetScissorWithCount(VkCommandBuffer commandBuffer, uint32_t scissorCount, const VkRect2D *pScissors)
{
radv_CmdSetScissor(commandBuffer, 0, scissorCount, pScissors);
}
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetStencilOp(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
- VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp,
- VkCompareOp compareOp)
+radv_CmdSetStencilOp(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, VkStencilOp failOp, VkStencilOp passOp,
+ VkStencilOp depthFailOp, VkCompareOp compareOp)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
if (!(vtx_info->has_hw_format & BITFIELD_BIT(vtx_info->num_channels - 1)))
vs_state->nontrivial_formats |= BITFIELD_BIT(loc);
- if ((chip == GFX6 || chip >= GFX10) &&
- state->vbo_bound_mask & BITFIELD_BIT(attrib->binding)) {
+ if ((chip == GFX6 || chip >= GFX10) && state->vbo_bound_mask & BITFIELD_BIT(attrib->binding)) {
if (binding->stride & align_req_minus_1) {
state->vbo_misaligned_mask |= BITFIELD_BIT(loc);
} else if ((cmd_buffer->vertex_bindings[attrib->binding].offset + vs_state->offsets[loc]) &
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetTessellationDomainOriginEXT(VkCommandBuffer commandBuffer,
- VkTessellationDomainOrigin domainOrigin)
+radv_CmdSetTessellationDomainOriginEXT(VkCommandBuffer commandBuffer, VkTessellationDomainOrigin domainOrigin)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetSampleMaskEXT(VkCommandBuffer commandBuffer, VkSampleCountFlagBits samples,
- const VkSampleMask *pSampleMask)
+radv_CmdSetSampleMaskEXT(VkCommandBuffer commandBuffer, VkSampleCountFlagBits samples, const VkSampleMask *pSampleMask)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetProvokingVertexModeEXT(VkCommandBuffer commandBuffer,
- VkProvokingVertexModeEXT provokingVertexMode)
+radv_CmdSetProvokingVertexModeEXT(VkCommandBuffer commandBuffer, VkProvokingVertexModeEXT provokingVertexMode)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetColorWriteMaskEXT(VkCommandBuffer commandBuffer, uint32_t firstAttachment,
- uint32_t attachmentCount, const VkColorComponentFlags *pColorWriteMasks)
+radv_CmdSetColorWriteMaskEXT(VkCommandBuffer commandBuffer, uint32_t firstAttachment, uint32_t attachmentCount,
+ const VkColorComponentFlags *pColorWriteMasks)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetColorBlendEnableEXT(VkCommandBuffer commandBuffer, uint32_t firstAttachment,
- uint32_t attachmentCount, const VkBool32* pColorBlendEnables)
+radv_CmdSetColorBlendEnableEXT(VkCommandBuffer commandBuffer, uint32_t firstAttachment, uint32_t attachmentCount,
+ const VkBool32 *pColorBlendEnables)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetRasterizationSamplesEXT(VkCommandBuffer commandBuffer,
- VkSampleCountFlagBits rasterizationSamples)
+radv_CmdSetRasterizationSamplesEXT(VkCommandBuffer commandBuffer, VkSampleCountFlagBits rasterizationSamples)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetLineRasterizationModeEXT(VkCommandBuffer commandBuffer,
- VkLineRasterizationModeEXT lineRasterizationMode)
+radv_CmdSetLineRasterizationModeEXT(VkCommandBuffer commandBuffer, VkLineRasterizationModeEXT lineRasterizationMode)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetColorBlendEquationEXT(VkCommandBuffer commandBuffer, uint32_t firstAttachment,
- uint32_t attachmentCount,
+radv_CmdSetColorBlendEquationEXT(VkCommandBuffer commandBuffer, uint32_t firstAttachment, uint32_t attachmentCount,
const VkColorBlendEquationEXT *pColorBlendEquations)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
for (uint32_t i = 0; i < attachmentCount; i++) {
unsigned idx = firstAttachment + i;
- state->dynamic.vk.cb.attachments[idx].src_color_blend_factor =
- pColorBlendEquations[i].srcColorBlendFactor;
- state->dynamic.vk.cb.attachments[idx].dst_color_blend_factor =
- pColorBlendEquations[i].dstColorBlendFactor;
- state->dynamic.vk.cb.attachments[idx].color_blend_op =
- pColorBlendEquations[i].colorBlendOp;
- state->dynamic.vk.cb.attachments[idx].src_alpha_blend_factor =
- pColorBlendEquations[i].srcAlphaBlendFactor;
- state->dynamic.vk.cb.attachments[idx].dst_alpha_blend_factor =
- pColorBlendEquations[i].dstAlphaBlendFactor;
- state->dynamic.vk.cb.attachments[idx].alpha_blend_op =
- pColorBlendEquations[i].alphaBlendOp;
+ state->dynamic.vk.cb.attachments[idx].src_color_blend_factor = pColorBlendEquations[i].srcColorBlendFactor;
+ state->dynamic.vk.cb.attachments[idx].dst_color_blend_factor = pColorBlendEquations[i].dstColorBlendFactor;
+ state->dynamic.vk.cb.attachments[idx].color_blend_op = pColorBlendEquations[i].colorBlendOp;
+ state->dynamic.vk.cb.attachments[idx].src_alpha_blend_factor = pColorBlendEquations[i].srcAlphaBlendFactor;
+ state->dynamic.vk.cb.attachments[idx].dst_alpha_blend_factor = pColorBlendEquations[i].dstAlphaBlendFactor;
+ state->dynamic.vk.cb.attachments[idx].alpha_blend_op = pColorBlendEquations[i].alphaBlendOp;
}
state->dirty |= RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_EQUATION;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetDiscardRectangleModeEXT(VkCommandBuffer commandBuffer,
- VkDiscardRectangleModeEXT discardRectangleMode)
+radv_CmdSetDiscardRectangleModeEXT(VkCommandBuffer commandBuffer, VkDiscardRectangleModeEXT discardRectangleMode)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetAttachmentFeedbackLoopEnableEXT(VkCommandBuffer commandBuffer,
- VkImageAspectFlags aspectMask)
+radv_CmdSetAttachmentFeedbackLoopEnableEXT(VkCommandBuffer commandBuffer, VkImageAspectFlags aspectMask)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_cmd_state *state = &cmd_buffer->state;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount,
- const VkCommandBuffer *pCmdBuffers)
+radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers)
{
RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer);
* DRAW_{INDEX}_INDIRECT_{MULTI} on GFX6-7 because it's illegal and hangs the GPU.
*/
const bool allow_ib2 =
- !secondary->state.uses_draw_indirect ||
- secondary->device->physical_device->rad_info.gfx_level >= GFX8;
+ !secondary->state.uses_draw_indirect || secondary->device->physical_device->rad_info.gfx_level >= GFX8;
primary->scratch_size_per_wave_needed =
MAX2(primary->scratch_size_per_wave_needed, secondary->scratch_size_per_wave_needed);
- primary->scratch_waves_wanted =
- MAX2(primary->scratch_waves_wanted, secondary->scratch_waves_wanted);
+ primary->scratch_waves_wanted = MAX2(primary->scratch_waves_wanted, secondary->scratch_waves_wanted);
primary->compute_scratch_size_per_wave_needed =
- MAX2(primary->compute_scratch_size_per_wave_needed,
- secondary->compute_scratch_size_per_wave_needed);
+ MAX2(primary->compute_scratch_size_per_wave_needed, secondary->compute_scratch_size_per_wave_needed);
primary->compute_scratch_waves_wanted =
MAX2(primary->compute_scratch_waves_wanted, secondary->compute_scratch_waves_wanted);
/* After executing commands from secondary buffers we have to dirty
* some states.
*/
- primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_INDEX_BUFFER |
- RADV_CMD_DIRTY_GUARDBAND | RADV_CMD_DIRTY_DYNAMIC_ALL |
- RADV_CMD_DIRTY_NGG_QUERY | RADV_CMD_DIRTY_OCCLUSION_QUERY;
+ primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_GUARDBAND |
+ RADV_CMD_DIRTY_DYNAMIC_ALL | RADV_CMD_DIRTY_NGG_QUERY | RADV_CMD_DIRTY_OCCLUSION_QUERY;
radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS);
radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE);
attachment_initial_layout(const VkRenderingAttachmentInfo *att)
{
const VkRenderingAttachmentInitialLayoutInfoMESA *layout_info =
- vk_find_struct_const(att->pNext,
- RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA);
+ vk_find_struct_const(att->pNext, RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA);
if (layout_info != NULL)
return layout_info->initialLayout;
const struct VkSampleLocationsInfoEXT *sample_locs_info =
vk_find_struct_const(pRenderingInfo->pNext, SAMPLE_LOCATIONS_INFO_EXT);
- struct radv_sample_locations_state sample_locations = { .count = 0, };
+ struct radv_sample_locations_state sample_locations = {
+ .count = 0,
+ };
if (sample_locs_info) {
- sample_locations = (struct radv_sample_locations_state) {
+ sample_locations = (struct radv_sample_locations_state){
.per_pixel = sample_locs_info->sampleLocationsPerPixel,
.grid_size = sample_locs_info->sampleLocationGridSize,
.count = sample_locs_info->sampleLocationsCount,
};
- typed_memcpy(sample_locations.locations,
- sample_locs_info->pSampleLocations,
+ typed_memcpy(sample_locations.locations, sample_locs_info->pSampleLocations,
sample_locs_info->sampleLocationsCount);
}
uint32_t color_samples = 0, ds_samples = 0;
struct radv_attachment color_att[MAX_RTS];
for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
- const VkRenderingAttachmentInfo *att_info =
- &pRenderingInfo->pColorAttachments[i];
+ const VkRenderingAttachmentInfo *att_info = &pRenderingInfo->pColorAttachments[i];
- color_att[i] = (struct radv_attachment) { .iview = NULL };
+ color_att[i] = (struct radv_attachment){.iview = NULL};
if (att_info->imageView == VK_NULL_HANDLE)
continue;
color_att[i].layout = att_info->imageLayout;
radv_initialise_color_surface(cmd_buffer->device, &color_att[i].cb, iview);
- if (att_info->resolveMode != VK_RESOLVE_MODE_NONE &&
- att_info->resolveImageView != VK_NULL_HANDLE) {
+ if (att_info->resolveMode != VK_RESOLVE_MODE_NONE && att_info->resolveImageView != VK_NULL_HANDLE) {
color_att[i].resolve_mode = att_info->resolveMode;
- color_att[i].resolve_iview =
- radv_image_view_from_handle(att_info->resolveImageView);
+ color_att[i].resolve_iview = radv_image_view_from_handle(att_info->resolveImageView);
color_att[i].resolve_layout = att_info->resolveImageLayout;
}
VkImageLayout initial_layout = attachment_initial_layout(att_info);
if (initial_layout != color_att[i].layout) {
assert(!(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT));
- radv_handle_rendering_image_transition(cmd_buffer, color_att[i].iview,
- pRenderingInfo->layerCount,
- pRenderingInfo->viewMask,
- initial_layout, VK_IMAGE_LAYOUT_UNDEFINED,
- color_att[i].layout, VK_IMAGE_LAYOUT_UNDEFINED,
- &sample_locations);
+ radv_handle_rendering_image_transition(cmd_buffer, color_att[i].iview, pRenderingInfo->layerCount,
+ pRenderingInfo->viewMask, initial_layout, VK_IMAGE_LAYOUT_UNDEFINED,
+ color_att[i].layout, VK_IMAGE_LAYOUT_UNDEFINED, &sample_locations);
}
}
- struct radv_attachment ds_att = { .iview = NULL };
+ struct radv_attachment ds_att = {.iview = NULL};
const VkRenderingAttachmentInfo *d_att_info = pRenderingInfo->pDepthAttachment;
const VkRenderingAttachmentInfo *s_att_info = pRenderingInfo->pStencilAttachment;
if ((d_att_info != NULL && d_att_info->imageView != VK_NULL_HANDLE) ||
initial_depth_layout = attachment_initial_layout(d_att_info);
ds_att.layout = d_att_info->imageLayout;
- if (d_att_info->resolveMode != VK_RESOLVE_MODE_NONE &&
- d_att_info->resolveImageView != VK_NULL_HANDLE) {
+ if (d_att_info->resolveMode != VK_RESOLVE_MODE_NONE && d_att_info->resolveImageView != VK_NULL_HANDLE) {
d_res_iview = radv_image_view_from_handle(d_att_info->resolveImageView);
ds_att.resolve_mode = d_att_info->resolveMode;
ds_att.resolve_layout = d_att_info->resolveImageLayout;
initial_stencil_layout = attachment_initial_layout(s_att_info);
ds_att.stencil_layout = s_att_info->imageLayout;
- if (s_att_info->resolveMode != VK_RESOLVE_MODE_NONE &&
- s_att_info->resolveImageView != VK_NULL_HANDLE) {
+ if (s_att_info->resolveMode != VK_RESOLVE_MODE_NONE && s_att_info->resolveImageView != VK_NULL_HANDLE) {
s_res_iview = radv_image_view_from_handle(s_att_info->resolveImageView);
ds_att.stencil_resolve_mode = s_att_info->resolveMode;
ds_att.stencil_resolve_layout = s_att_info->resolveImageLayout;
}
assert(d_iview == NULL || s_iview == NULL || d_iview == s_iview);
- ds_att.iview = d_iview ? d_iview : s_iview,
- ds_att.format = ds_att.iview->vk.format;
+ ds_att.iview = d_iview ? d_iview : s_iview, ds_att.format = ds_att.iview->vk.format;
radv_initialise_ds_surface(cmd_buffer->device, &ds_att.ds, ds_att.iview);
assert(d_res_iview == NULL || s_res_iview == NULL || d_res_iview == s_res_iview);
ds_samples = ds_att.iview->vk.image->samples;
- if (initial_depth_layout != ds_att.layout ||
- initial_stencil_layout != ds_att.stencil_layout) {
+ if (initial_depth_layout != ds_att.layout || initial_stencil_layout != ds_att.stencil_layout) {
assert(!(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT));
- radv_handle_rendering_image_transition(cmd_buffer, ds_att.iview,
- pRenderingInfo->layerCount,
- pRenderingInfo->viewMask,
- initial_depth_layout, initial_stencil_layout,
- ds_att.layout, ds_att.stencil_layout,
- &sample_locations);
+ radv_handle_rendering_image_transition(cmd_buffer, ds_att.iview, pRenderingInfo->layerCount,
+ pRenderingInfo->viewMask, initial_depth_layout, initial_stencil_layout,
+ ds_att.layout, ds_att.stencil_layout, &sample_locations);
}
}
if (cmd_buffer->vk.render_pass)
radv_describe_barrier_end(cmd_buffer);
const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_info =
- vk_find_struct_const(pRenderingInfo->pNext,
- RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR);
- struct radv_attachment vrs_att = { .iview = NULL };
- VkExtent2D vrs_texel_size = { .width = 0 };
+ vk_find_struct_const(pRenderingInfo->pNext, RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR);
+ struct radv_attachment vrs_att = {.iview = NULL};
+ VkExtent2D vrs_texel_size = {.width = 0};
if (fsr_info && fsr_info->imageView) {
VK_FROM_HANDLE(radv_image_view, iview, fsr_info->imageView);
- vrs_att = (struct radv_attachment) {
+ vrs_att = (struct radv_attachment){
.format = iview->vk.format,
.iview = iview,
.layout = fsr_info->imageLayout,
render->area.offset.x + render->area.extent.height <= ds_image->vk.extent.height);
/* Copy the VRS rates to the HTILE buffer. */
- radv_copy_vrs_htile(cmd_buffer, render->vrs_att.iview->image, &render->area, ds_image,
- &htile_buffer, true);
+ radv_copy_vrs_htile(cmd_buffer, render->vrs_att.iview->image, &render->area, ds_image, &htile_buffer, true);
radv_buffer_finish(&htile_buffer);
} else {
struct radv_image *ds_image = radv_cmd_buffer_get_vrs_image(cmd_buffer);
if (ds_image && render->area.offset.x < ds_image->vk.extent.width &&
- render->area.offset.y < ds_image->vk.extent.height) {
+ render->area.offset.y < ds_image->vk.extent.height) {
/* HTILE buffer */
struct radv_buffer *htile_buffer = cmd_buffer->device->vrs.buffer;
area.extent.height = MIN2(area.extent.height, ds_image->vk.extent.height - area.offset.y);
/* Copy the VRS rates to the HTILE buffer. */
- radv_copy_vrs_htile(cmd_buffer, render->vrs_att.iview->image, &area, ds_image,
- htile_buffer, false);
+ radv_copy_vrs_htile(cmd_buffer, render->vrs_att.iview->image, &area, ds_image, htile_buffer, false);
}
}
}
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 6);
radeon_set_context_reg(cmd_buffer->cs, R_028204_PA_SC_WINDOW_SCISSOR_TL,
- S_028204_TL_X(render->area.offset.x) |
- S_028204_TL_Y(render->area.offset.y));
+ S_028204_TL_X(render->area.offset.x) | S_028204_TL_Y(render->area.offset.y));
radeon_set_context_reg(cmd_buffer->cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
S_028208_BR_X(render->area.offset.x + render->area.extent.width) |
- S_028208_BR_Y(render->area.offset.y + render->area.extent.height));
+ S_028208_BR_Y(render->area.offset.y + render->area.extent.height));
if (!(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT))
radv_cmd_buffer_clear_rendering(cmd_buffer, pRenderingInfo);
}
static void
-radv_emit_view_index_per_stage(struct radeon_cmdbuf *cs, const struct radv_shader *shader,
- uint32_t base_reg, unsigned index)
+radv_emit_view_index_per_stage(struct radeon_cmdbuf *cs, const struct radv_shader *shader, uint32_t base_reg,
+ unsigned index)
{
const struct radv_userdata_info *loc = radv_get_user_sgpr(shader, AC_UD_VIEW_INDEX);
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
- radv_foreach_stage(stage, cmd_buffer->state.active_stages & ~VK_SHADER_STAGE_TASK_BIT_EXT) {
+ radv_foreach_stage(stage, cmd_buffer->state.active_stages & ~VK_SHADER_STAGE_TASK_BIT_EXT)
+ {
const struct radv_shader *shader = radv_get_shader(cmd_buffer->state.shaders, stage);
radv_emit_view_index_per_stage(cs, shader, shader->info.user_data_0, index);
}
if (cmd_buffer->state.gs_copy_shader) {
- radv_emit_view_index_per_stage(cs, cmd_buffer->state.gs_copy_shader,
- R_00B130_SPI_SHADER_USER_DATA_VS_0, index);
+ radv_emit_view_index_per_stage(cs, cmd_buffer->state.gs_copy_shader, R_00B130_SPI_SHADER_USER_DATA_VS_0, index);
}
if (cmd_buffer->state.active_stages & VK_SHADER_STAGE_TASK_BIT_EXT) {
- radv_emit_view_index_per_stage(cmd_buffer->gang.cs,
- cmd_buffer->state.shaders[MESA_SHADER_TASK],
- cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.user_data_0,
- index);
+ radv_emit_view_index_per_stage(cmd_buffer->gang.cs, cmd_buffer->state.shaders[MESA_SHADER_TASK],
+ cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.user_data_0, index);
}
}
* space in the upload BO and emit some packets to invert the condition.
*/
static void
-radv_cs_emit_compute_predication(struct radv_cmd_state *state, struct radeon_cmdbuf *cs,
- uint64_t inv_va, bool *inv_emitted, unsigned dwords)
+radv_cs_emit_compute_predication(struct radv_cmd_state *state, struct radeon_cmdbuf *cs, uint64_t inv_va,
+ bool *inv_emitted, unsigned dwords)
{
if (!state->predicating)
return;
/* Write 1 to the inverted predication VA. */
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs,
+ COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
radeon_emit(cs, 1);
radeon_emit(cs, 0);
radeon_emit(cs, inv_va);
/* Write 0 to the new predication VA (when the API condition != 0) */
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs,
+ COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, inv_va);
radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
- radeon_emit(cs, 0); /* Cache policy */
+ radeon_emit(cs, 0); /* Cache policy */
radeon_emit(cs, dwords); /* Size of the predicated packet(s) in DWORDs. */
}
static void
-radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_count,
- uint32_t use_opaque)
+radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_count, uint32_t use_opaque)
{
radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating));
radeon_emit(cmd_buffer->cs, vertex_count);
* Hardware uses this information to return 0 for out-of-bounds reads.
*/
static void
-radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t index_va,
- uint32_t max_index_count, uint32_t index_count, bool not_eop)
+radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t index_va, uint32_t max_index_count,
+ uint32_t index_count, bool not_eop)
{
radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_2, 4, cmd_buffer->state.predicating));
radeon_emit(cmd_buffer->cs, max_index_count);
/* MUST inline this function to avoid massive perf loss in drawoverhead */
ALWAYS_INLINE static void
-radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool indexed,
- uint32_t draw_count, uint64_t count_va, uint32_t stride)
+radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool indexed, uint32_t draw_count,
+ uint64_t count_va, uint32_t stride)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
const unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX;
draw_id_reg = ((base_reg + mesh * 12 + 4) - SI_SH_REG_OFFSET) >> 2;
if (draw_count == 1 && !count_va && !draw_id_enable) {
- radeon_emit(cs,
- PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : PKT3_DRAW_INDIRECT, 3, predicating));
+ radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : PKT3_DRAW_INDIRECT, 3, predicating));
radeon_emit(cs, 0);
radeon_emit(cs, vertex_offset_reg);
radeon_emit(cs, start_instance_reg);
radeon_emit(cs, di_src_sel);
} else {
- radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI : PKT3_DRAW_INDIRECT_MULTI, 8,
- predicating));
+ radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI : PKT3_DRAW_INDIRECT_MULTI, 8, predicating));
radeon_emit(cs, 0);
radeon_emit(cs, vertex_offset_reg);
radeon_emit(cs, start_instance_reg);
- radeon_emit(cs, draw_id_reg | S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) |
- S_2C3_COUNT_INDIRECT_ENABLE(!!count_va));
+ radeon_emit(cs, draw_id_reg | S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) | S_2C3_COUNT_INDIRECT_ENABLE(!!count_va));
radeon_emit(cs, draw_count); /* count */
radeon_emit(cs, count_va); /* count_addr */
radeon_emit(cs, count_va >> 32);
}
ALWAYS_INLINE static void
-radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t draw_count,
- uint64_t count_va, uint32_t stride)
+radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t draw_count, uint64_t count_va,
+ uint32_t stride)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint32_t base_reg = cmd_buffer->state.vtx_base_sgpr;
radeon_emit(cs, 0); /* data_offset */
radeon_emit(cs, S_4C1_XYZ_DIM_REG(xyz_dim_reg) | S_4C1_DRAW_INDEX_REG(draw_id_reg));
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11)
- radeon_emit(cs, S_4C2_DRAW_INDEX_ENABLE(draw_id_enable) |
- S_4C2_COUNT_INDIRECT_ENABLE(!!count_va) |
+ radeon_emit(cs, S_4C2_DRAW_INDEX_ENABLE(draw_id_enable) | S_4C2_COUNT_INDIRECT_ENABLE(!!count_va) |
S_4C2_XYZ_DIM_ENABLE(xyz_dim_enable) | S_4C2_MODE1_ENABLE(mode1_enable));
else
- radeon_emit(
- cs, S_4C2_DRAW_INDEX_ENABLE(draw_id_enable) | S_4C2_COUNT_INDIRECT_ENABLE(!!count_va));
+ radeon_emit(cs, S_4C2_DRAW_INDEX_ENABLE(draw_id_enable) | S_4C2_COUNT_INDIRECT_ENABLE(!!count_va));
radeon_emit(cs, draw_count);
radeon_emit(cs, count_va & 0xFFFFFFFF);
radeon_emit(cs, count_va >> 32);
}
ALWAYS_INLINE static void
-radv_cs_emit_dispatch_taskmesh_direct_ace_packet(struct radv_cmd_buffer *cmd_buffer,
- const uint32_t x, const uint32_t y,
+radv_cs_emit_dispatch_taskmesh_direct_ace_packet(struct radv_cmd_buffer *cmd_buffer, const uint32_t x, const uint32_t y,
const uint32_t z)
{
struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK];
struct radeon_cmdbuf *cs = cmd_buffer->gang.cs;
const bool predicating = cmd_buffer->state.predicating;
- const uint32_t dispatch_initiator = cmd_buffer->device->dispatch_initiator_task |
- S_00B800_CS_W32_EN(task_shader->info.wave_size == 32);
+ const uint32_t dispatch_initiator =
+ cmd_buffer->device->dispatch_initiator_task | S_00B800_CS_W32_EN(task_shader->info.wave_size == 32);
- const struct radv_userdata_info *ring_entry_loc =
- radv_get_user_sgpr(task_shader, AC_UD_TASK_RING_ENTRY);
+ const struct radv_userdata_info *ring_entry_loc = radv_get_user_sgpr(task_shader, AC_UD_TASK_RING_ENTRY);
assert(ring_entry_loc && ring_entry_loc->sgpr_idx != -1 && ring_entry_loc->num_sgprs == 1);
- uint32_t ring_entry_reg =
- (R_00B900_COMPUTE_USER_DATA_0 + ring_entry_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
+ uint32_t ring_entry_reg = (R_00B900_COMPUTE_USER_DATA_0 + ring_entry_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_DIRECT_ACE, 4, predicating) | PKT3_SHADER_TYPE_S(1));
radeon_emit(cs, x);
}
ALWAYS_INLINE static void
-radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(struct radv_cmd_buffer *cmd_buffer,
- uint64_t data_va, uint32_t draw_count,
- uint64_t count_va, uint32_t stride)
+radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t data_va,
+ uint32_t draw_count, uint64_t count_va, uint32_t stride)
{
assert((data_va & 0x03) == 0);
assert((count_va & 0x03) == 0);
const uint32_t xyz_dim_enable = task_shader->info.cs.uses_grid_size;
const uint32_t draw_id_enable = task_shader->info.vs.needs_draw_id;
- const uint32_t dispatch_initiator = cmd_buffer->device->dispatch_initiator_task |
- S_00B800_CS_W32_EN(task_shader->info.wave_size == 32);
+ const uint32_t dispatch_initiator =
+ cmd_buffer->device->dispatch_initiator_task | S_00B800_CS_W32_EN(task_shader->info.wave_size == 32);
- const struct radv_userdata_info *ring_entry_loc =
- radv_get_user_sgpr(task_shader, AC_UD_TASK_RING_ENTRY);
- const struct radv_userdata_info *xyz_dim_loc =
- radv_get_user_sgpr(task_shader, AC_UD_CS_GRID_SIZE);
- const struct radv_userdata_info *draw_id_loc =
- radv_get_user_sgpr(task_shader, AC_UD_CS_TASK_DRAW_ID);
+ const struct radv_userdata_info *ring_entry_loc = radv_get_user_sgpr(task_shader, AC_UD_TASK_RING_ENTRY);
+ const struct radv_userdata_info *xyz_dim_loc = radv_get_user_sgpr(task_shader, AC_UD_CS_GRID_SIZE);
+ const struct radv_userdata_info *draw_id_loc = radv_get_user_sgpr(task_shader, AC_UD_CS_TASK_DRAW_ID);
assert(ring_entry_loc->sgpr_idx != -1 && ring_entry_loc->num_sgprs == 1);
assert(!xyz_dim_enable || (xyz_dim_loc->sgpr_idx != -1 && xyz_dim_loc->num_sgprs == 3));
const uint32_t ring_entry_reg =
(R_00B900_COMPUTE_USER_DATA_0 + ring_entry_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
const uint32_t xyz_dim_reg =
- !xyz_dim_enable
- ? 0
- : (R_00B900_COMPUTE_USER_DATA_0 + xyz_dim_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
+ !xyz_dim_enable ? 0 : (R_00B900_COMPUTE_USER_DATA_0 + xyz_dim_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
const uint32_t draw_id_reg =
- !draw_id_enable
- ? 0
- : (R_00B900_COMPUTE_USER_DATA_0 + draw_id_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
+ !draw_id_enable ? 0 : (R_00B900_COMPUTE_USER_DATA_0 + draw_id_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_INDIRECT_MULTI_ACE, 9, 0) | PKT3_SHADER_TYPE_S(1));
radeon_emit(cs, data_va);
radeon_emit(cs, data_va >> 32);
radeon_emit(cs, S_AD2_RING_ENTRY_REG(ring_entry_reg));
- radeon_emit(cs, S_AD3_COUNT_INDIRECT_ENABLE(!!count_va) |
- S_AD3_DRAW_INDEX_ENABLE(draw_id_enable) |
+ radeon_emit(cs, S_AD3_COUNT_INDIRECT_ENABLE(!!count_va) | S_AD3_DRAW_INDEX_ENABLE(draw_id_enable) |
S_AD3_XYZ_DIM_ENABLE(xyz_dim_enable) | S_AD3_DRAW_INDEX_REG(draw_id_reg));
radeon_emit(cs, S_AD4_XYZ_DIM_REG(xyz_dim_reg));
radeon_emit(cs, draw_count);
uint32_t ring_entry_reg = ((base_reg + ring_entry_loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2;
uint32_t xyz_dim_en = 1; /* TODO: disable XYZ_DIM when unneeded */
uint32_t mode1_en = 1; /* legacy fast launch mode */
- uint32_t linear_dispatch_en =
- cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.cs.linear_taskmesh_dispatch;
+ uint32_t linear_dispatch_en = cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.cs.linear_taskmesh_dispatch;
radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_GFX, 2, predicating) | PKT3_RESET_FILTER_CAM(1));
radeon_emit(cs, S_4D0_RING_ENTRY_REG(ring_entry_reg) | S_4D0_XYZ_DIM_REG(xyz_dim_reg));
}
ALWAYS_INLINE static void
-radv_emit_userdata_vertex_internal(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_draw_info *info, const uint32_t vertex_offset)
+radv_emit_userdata_vertex_internal(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info,
+ const uint32_t vertex_offset)
{
struct radv_cmd_state *state = &cmd_buffer->state;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
state->last_vertex_offset = vertex_offset;
if (drawid)
radeon_emit(cs, drawid);
-
}
ALWAYS_INLINE static void
-radv_emit_userdata_mesh(struct radv_cmd_buffer *cmd_buffer,
- const uint32_t x, const uint32_t y, const uint32_t z)
+radv_emit_userdata_mesh(struct radv_cmd_buffer *cmd_buffer, const uint32_t x, const uint32_t y, const uint32_t z)
{
struct radv_cmd_state *state = &cmd_buffer->state;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
}
ALWAYS_INLINE static void
-radv_emit_userdata_task(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z,
- uint32_t draw_id)
+radv_emit_userdata_task(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z, uint32_t draw_id)
{
struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK];
struct radeon_cmdbuf *cs = cmd_buffer->gang.cs;
const struct radv_userdata_info *xyz_loc = radv_get_user_sgpr(task_shader, AC_UD_CS_GRID_SIZE);
- const struct radv_userdata_info *draw_id_loc =
- radv_get_user_sgpr(task_shader, AC_UD_CS_TASK_DRAW_ID);
+ const struct radv_userdata_info *draw_id_loc = radv_get_user_sgpr(task_shader, AC_UD_CS_TASK_DRAW_ID);
if (xyz_loc->sgpr_idx != -1) {
assert(xyz_loc->num_sgprs == 3);
* which requires 0 for out-of-bounds access.
*/
static void
-radv_handle_zero_index_buffer_bug(struct radv_cmd_buffer *cmd_buffer, uint64_t *index_va,
- uint32_t *remaining_indexes)
+radv_handle_zero_index_buffer_bug(struct radv_cmd_buffer *cmd_buffer, uint64_t *index_va, uint32_t *remaining_indexes)
{
const uint32_t zero = 0;
uint32_t offset;
}
ALWAYS_INLINE static void
-radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_draw_info *info,
- uint32_t drawCount, const VkMultiDrawIndexedInfoEXT *minfo,
- uint32_t stride,
+radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info,
+ uint32_t drawCount, const VkMultiDrawIndexedInfoEXT *minfo, uint32_t stride,
const int32_t *vertexOffset)
{
const int index_size = radv_get_vgt_index_size(state->index_type);
unsigned i = 0;
const bool uses_drawid = state->uses_drawid;
- const bool can_eop =
- !uses_drawid && cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10;
+ const bool can_eop = !uses_drawid && cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10;
if (uses_drawid) {
if (vertexOffset) {
radv_emit_userdata_vertex(cmd_buffer, info, *vertexOffset);
- vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) {
+ vk_foreach_multi_draw_indexed (draw, i, minfo, drawCount, stride) {
uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex;
uint64_t index_va = state->index_va + draw->firstIndex * index_size;
/* Handle draw calls with 0-sized index buffers if the GPU can't support them. */
- if (!remaining_indexes &&
- cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
+ if (!remaining_indexes && cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
radv_handle_zero_index_buffer_bug(cmd_buffer, &index_va, &remaining_indexes);
if (i > 0)
if (!state->render.view_mask) {
radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false);
} else {
- u_foreach_bit(view, state->render.view_mask) {
+ u_foreach_bit (view, state->render.view_mask) {
radv_emit_view_index(cmd_buffer, view);
radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false);
}
}
} else {
- vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) {
+ vk_foreach_multi_draw_indexed (draw, i, minfo, drawCount, stride) {
uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex;
uint64_t index_va = state->index_va + draw->firstIndex * index_size;
/* Handle draw calls with 0-sized index buffers if the GPU can't support them. */
- if (!remaining_indexes &&
- cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
+ if (!remaining_indexes && cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
radv_handle_zero_index_buffer_bug(cmd_buffer, &index_va, &remaining_indexes);
if (i > 0) {
if (!state->render.view_mask) {
radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false);
} else {
- u_foreach_bit(view, state->render.view_mask) {
+ u_foreach_bit (view, state->render.view_mask) {
radv_emit_view_index(cmd_buffer, view);
radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false);
* count == 0 for the last draw that doesn't have NOT_EOP.
*/
while (drawCount > 1) {
- const VkMultiDrawIndexedInfoEXT *last = (const VkMultiDrawIndexedInfoEXT*)(((const uint8_t*)minfo) + (drawCount - 1) * stride);
+ const VkMultiDrawIndexedInfoEXT *last =
+ (const VkMultiDrawIndexedInfoEXT *)(((const uint8_t *)minfo) + (drawCount - 1) * stride);
if (last->indexCount)
break;
drawCount--;
}
radv_emit_userdata_vertex(cmd_buffer, info, *vertexOffset);
- vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) {
+ vk_foreach_multi_draw_indexed (draw, i, minfo, drawCount, stride) {
uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex;
uint64_t index_va = state->index_va + draw->firstIndex * index_size;
/* Handle draw calls with 0-sized index buffers if the GPU can't support them. */
- if (!remaining_indexes &&
- cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
+ if (!remaining_indexes && cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
radv_handle_zero_index_buffer_bug(cmd_buffer, &index_va, &remaining_indexes);
if (!state->render.view_mask) {
- radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, can_eop && i < drawCount - 1);
+ radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount,
+ can_eop && i < drawCount - 1);
} else {
- u_foreach_bit(view, state->render.view_mask) {
+ u_foreach_bit (view, state->render.view_mask) {
radv_emit_view_index(cmd_buffer, view);
radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false);
}
}
} else {
- vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) {
+ vk_foreach_multi_draw_indexed (draw, i, minfo, drawCount, stride) {
uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex;
uint64_t index_va = state->index_va + draw->firstIndex * index_size;
/* Handle draw calls with 0-sized index buffers if the GPU can't support them. */
- if (!remaining_indexes &&
- cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
+ if (!remaining_indexes && cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
radv_handle_zero_index_buffer_bug(cmd_buffer, &index_va, &remaining_indexes);
- const VkMultiDrawIndexedInfoEXT *next = (const VkMultiDrawIndexedInfoEXT*)(i < drawCount - 1 ? ((uint8_t*)draw + stride) : NULL);
+ const VkMultiDrawIndexedInfoEXT *next =
+ (const VkMultiDrawIndexedInfoEXT *)(i < drawCount - 1 ? ((uint8_t *)draw + stride) : NULL);
const bool offset_changes = next && next->vertexOffset != draw->vertexOffset;
radv_emit_userdata_vertex(cmd_buffer, info, draw->vertexOffset);
if (!state->render.view_mask) {
- radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, can_eop && !offset_changes && i < drawCount - 1);
+ radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount,
+ can_eop && !offset_changes && i < drawCount - 1);
} else {
- u_foreach_bit(view, state->render.view_mask) {
+ u_foreach_bit (view, state->render.view_mask) {
radv_emit_view_index(cmd_buffer, view);
radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false);
}
ALWAYS_INLINE static void
-radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info,
- uint32_t drawCount, const VkMultiDrawInfoEXT *minfo,
- uint32_t use_opaque, uint32_t stride)
+radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, uint32_t drawCount,
+ const VkMultiDrawInfoEXT *minfo, uint32_t use_opaque, uint32_t stride)
{
unsigned i = 0;
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
const bool uses_drawid = cmd_buffer->state.uses_drawid;
uint32_t last_start = 0;
- vk_foreach_multi_draw(draw, i, minfo, drawCount, stride) {
+ vk_foreach_multi_draw (draw, i, minfo, drawCount, stride) {
if (!i)
radv_emit_userdata_vertex(cmd_buffer, info, draw->firstVertex);
else
if (!view_mask) {
radv_cs_emit_draw_packet(cmd_buffer, draw->vertexCount, use_opaque);
} else {
- u_foreach_bit(view, view_mask) {
+ u_foreach_bit (view, view_mask) {
radv_emit_view_index(cmd_buffer, view);
radv_cs_emit_draw_packet(cmd_buffer, draw->vertexCount, use_opaque);
}
last_start = draw->firstVertex;
}
if (drawCount > 1) {
- struct radv_cmd_state *state = &cmd_buffer->state;
- assert(state->last_vertex_offset_valid);
- state->last_vertex_offset = last_start;
- if (uses_drawid)
- state->last_drawid = drawCount - 1;
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ assert(state->last_vertex_offset_valid);
+ state->last_vertex_offset = last_start;
+ if (uses_drawid)
+ state->last_drawid = drawCount - 1;
}
}
ALWAYS_INLINE static void
-radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer,
- uint32_t x, uint32_t y, uint32_t z)
+radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z)
{
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
const uint32_t count = x * y * z;
if (!view_mask) {
radv_cs_emit_draw_packet(cmd_buffer, count, 0);
} else {
- u_foreach_bit(view, view_mask) {
+ u_foreach_bit (view, view_mask) {
radv_emit_view_index(cmd_buffer, view);
radv_cs_emit_draw_packet(cmd_buffer, count, 0);
}
}
ALWAYS_INLINE static void
-radv_emit_indirect_mesh_draw_packets(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_draw_info *info)
+radv_emit_indirect_mesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info)
{
const struct radv_cmd_state *state = &cmd_buffer->state;
struct radeon_winsys *ws = cmd_buffer->device->ws;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
- const uint64_t va =
- radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset;
- const uint64_t count_va = !info->count_buffer
- ? 0
- : radv_buffer_get_va(info->count_buffer->bo) +
- info->count_buffer->offset + info->count_buffer_offset;
+ const uint64_t va = radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset;
+ const uint64_t count_va = !info->count_buffer ? 0
+ : radv_buffer_get_va(info->count_buffer->bo) +
+ info->count_buffer->offset + info->count_buffer_offset;
radv_cs_add_buffer(ws, cs, info->indirect->bo);
}
ALWAYS_INLINE static void
-radv_emit_direct_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y,
- uint32_t z)
+radv_emit_direct_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z)
{
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
const unsigned num_views = MAX2(1, util_bitcount(view_mask));
unsigned ace_predication_size = num_views * 6; /* DISPATCH_TASKMESH_DIRECT_ACE size */
radv_emit_userdata_task(cmd_buffer, x, y, z, 0);
- radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->gang.cs,
- cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted,
- ace_predication_size);
+ radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->gang.cs, cmd_buffer->mec_inv_pred_va,
+ &cmd_buffer->mec_inv_pred_emitted, ace_predication_size);
if (!view_mask) {
radv_cs_emit_dispatch_taskmesh_direct_ace_packet(cmd_buffer, x, y, z);
}
static void
-radv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_draw_info *info)
+radv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info)
{
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
struct radeon_winsys *ws = cmd_buffer->device->ws;
unsigned ace_predication_size = num_views * 11; /* DISPATCH_TASKMESH_INDIRECT_MULTI_ACE size */
struct radeon_cmdbuf *ace_cs = cmd_buffer->gang.cs;
- const uint64_t va =
- radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset;
- const uint64_t count_va = !info->count_buffer
- ? 0
- : radv_buffer_get_va(info->count_buffer->bo) +
- info->count_buffer->offset + info->count_buffer_offset;
+ const uint64_t va = radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset;
+ const uint64_t count_va = !info->count_buffer ? 0
+ : radv_buffer_get_va(info->count_buffer->bo) +
+ info->count_buffer->offset + info->count_buffer_offset;
uint64_t workaround_cond_va = 0;
if (num_views > 1)
workaround_cond_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + workaround_cond_off;
radeon_emit(ace_cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(ace_cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_WR_CONFIRM);
+ radeon_emit(ace_cs,
+ COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
radeon_emit(ace_cs, 1);
radeon_emit(ace_cs, 0);
radeon_emit(ace_cs, workaround_cond_va);
}
radv_cs_add_buffer(ws, cmd_buffer->gang.cs, info->indirect->bo);
- radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->gang.cs,
- cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted,
- ace_predication_size);
+ radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->gang.cs, cmd_buffer->mec_inv_pred_va,
+ &cmd_buffer->mec_inv_pred_emitted, ace_predication_size);
if (workaround_cond_va) {
radeon_emit(ace_cs, PKT3(PKT3_COND_EXEC, 3, 0));
radeon_emit(ace_cs, count_va);
radeon_emit(ace_cs, count_va >> 32);
radeon_emit(ace_cs, 0);
- radeon_emit(ace_cs,
- 6 + 11 * num_views); /* 1x COPY_DATA + Nx DISPATCH_TASKMESH_INDIRECT_MULTI_ACE */
+ radeon_emit(ace_cs, 6 + 11 * num_views); /* 1x COPY_DATA + Nx DISPATCH_TASKMESH_INDIRECT_MULTI_ACE */
radeon_emit(ace_cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(ace_cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_WR_CONFIRM);
+ radeon_emit(ace_cs,
+ COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
radeon_emit(ace_cs, 0);
radeon_emit(ace_cs, 0);
radeon_emit(ace_cs, workaround_cond_va);
}
if (!view_mask) {
- radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(cmd_buffer, va, info->count,
- count_va, info->stride);
+ radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(cmd_buffer, va, info->count, count_va, info->stride);
radv_cs_emit_dispatch_taskmesh_gfx_packet(cmd_buffer);
} else {
u_foreach_bit (view, view_mask) {
radv_emit_view_index(cmd_buffer, view);
- radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(cmd_buffer, va, info->count,
- count_va, info->stride);
+ radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(cmd_buffer, va, info->count, count_va, info->stride);
radv_cs_emit_dispatch_taskmesh_gfx_packet(cmd_buffer);
}
}
}
static void
-radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_draw_info *info)
+radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info)
{
const struct radv_cmd_state *state = &cmd_buffer->state;
struct radeon_winsys *ws = cmd_buffer->device->ws;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
- const uint64_t va =
- radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset;
- const uint64_t count_va = info->count_buffer
- ? radv_buffer_get_va(info->count_buffer->bo) +
- info->count_buffer->offset + info->count_buffer_offset
- : 0;
+ const uint64_t va = radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset;
+ const uint64_t count_va = info->count_buffer ? radv_buffer_get_va(info->count_buffer->bo) +
+ info->count_buffer->offset + info->count_buffer_offset
+ : 0;
radv_cs_add_buffer(ws, cs, info->indirect->bo);
}
if (!state->render.view_mask) {
- radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va,
- info->stride);
+ radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va, info->stride);
} else {
- u_foreach_bit(i, state->render.view_mask)
- {
+ u_foreach_bit (i, state->render.view_mask) {
radv_emit_view_index(cmd_buffer, i);
- radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va,
- info->stride);
+ radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va, info->stride);
}
}
}
* any context registers.
*/
static bool
-radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_draw_info *info)
+radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info)
{
if (cmd_buffer->state.context_roll_without_scissor_emitted || info->strmout_buffer)
return true;
- uint64_t used_states =
- cmd_buffer->state.graphics_pipeline->needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL;
+ uint64_t used_states = cmd_buffer->state.graphics_pipeline->needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL;
/* Index, vertex and streamout buffers don't change context regs.
* We assume that any other dirty flag causes context rolls.
*/
- used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER |
- RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT | RADV_CMD_DIRTY_STREAMOUT_BUFFER);
+ used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER | RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT |
+ RADV_CMD_DIRTY_STREAMOUT_BUFFER);
return cmd_buffer->state.dirty & used_states;
}
* because we don't know the primitive topology at compile time, so we should
* disable it dynamically for points or lines.
*/
- const unsigned num_vertices_per_prim =
- si_conv_prim_to_gs_out(d->vk.ia.primitive_topology, true) + 1;
+ const unsigned num_vertices_per_prim = si_conv_prim_to_gs_out(d->vk.ia.primitive_topology, true) + 1;
if (num_vertices_per_prim != 3)
return radv_nggc_none;
unsigned rasterization_samples = radv_get_rasterization_samples(cmd_buffer);
unsigned subpixel_bits = 256;
int32_t small_prim_precision_log2 = util_logbase2(rasterization_samples) - util_logbase2(subpixel_bits);
- nggc_settings |= ((uint32_t) small_prim_precision_log2 << 24u);
+ nggc_settings |= ((uint32_t)small_prim_precision_log2 << 24u);
}
return nggc_settings;
/* Correction for number of samples per pixel. */
for (unsigned i = 0; i < 2; ++i) {
- vp_scale[i] *= (float) cmd_buffer->state.dynamic.vk.ms.rasterization_samples;
- vp_translate[i] *= (float) cmd_buffer->state.dynamic.vk.ms.rasterization_samples;
+ vp_scale[i] *= (float)cmd_buffer->state.dynamic.vk.ms.rasterization_samples;
+ vp_translate[i] *= (float)cmd_buffer->state.dynamic.vk.ms.rasterization_samples;
}
uint32_t vp_reg_values[4] = {fui(vp_scale[0]), fui(vp_scale[1]), fui(vp_translate[0]), fui(vp_translate[1])};
- const int8_t vp_sgpr_idx =
- radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_VIEWPORT)->sgpr_idx;
+ const int8_t vp_sgpr_idx = radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_VIEWPORT)->sgpr_idx;
assert(vp_sgpr_idx != -1);
radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + vp_sgpr_idx * 4, 4);
radeon_emit_array(cmd_buffer->cs, vp_reg_values, 4);
}
- const int8_t nggc_sgpr_idx =
- radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_CULLING_SETTINGS)->sgpr_idx;
+ const int8_t nggc_sgpr_idx = radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_CULLING_SETTINGS)->sgpr_idx;
assert(nggc_sgpr_idx != -1);
radeon_set_sh_reg(cmd_buffer->cs, base_reg + nggc_sgpr_idx * 4, nggc_settings);
if (cmd_buffer->state.graphics_pipeline->ps_epilog) {
ps_epilog = cmd_buffer->state.graphics_pipeline->ps_epilog;
} else if ((cmd_buffer->state.emitted_graphics_pipeline != cmd_buffer->state.graphics_pipeline ||
- (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK |
- RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_EQUATION)))) {
+ (cmd_buffer->state.dirty &
+ (RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK | RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE |
+ RADV_CMD_DIRTY_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE | RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_EQUATION)))) {
ps_epilog = lookup_ps_epilog(cmd_buffer);
if (!ps_epilog) {
vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
cmd_buffer->state.col_format_non_compacted = ps_epilog->spi_shader_col_format;
bool need_null_export_workaround = radv_needs_null_export_workaround(
- device, cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT],
- cmd_buffer->state.custom_blend_mode);
+ device, cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT], cmd_buffer->state.custom_blend_mode);
if (need_null_export_workaround && !cmd_buffer->state.col_format_non_compacted)
cmd_buffer->state.col_format_non_compacted = V_028714_SPI_SHADER_32_R;
* 1. radv_need_late_scissor_emission
* 2. any dirty dynamic flags that may cause context rolls
*/
- const bool late_scissor_emission =
- cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug
- ? radv_need_late_scissor_emission(cmd_buffer, info) : false;
+ const bool late_scissor_emission = cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug
+ ? radv_need_late_scissor_emission(cmd_buffer, info)
+ : false;
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_RBPLUS)
radv_emit_rbplus_state(cmd_buffer);
radv_flush_occlusion_query_state(cmd_buffer);
if ((cmd_buffer->state.dirty &
- (RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_DYNAMIC_CULL_MODE |
- RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE | RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_VIEWPORT | RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE |
- RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY |
- RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS_ENABLE)) &&
+ (RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
+ RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT |
+ RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE | RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES |
+ RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS_ENABLE)) &&
cmd_buffer->state.has_nggc)
radv_emit_ngg_culling_state(cmd_buffer);
- if ((cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK |
- RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES |
- RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE)) ||
+ if ((cmd_buffer->state.dirty &
+ (RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK | RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES |
+ RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE)) ||
cmd_buffer->state.emitted_graphics_pipeline != cmd_buffer->state.graphics_pipeline)
radv_emit_binning_state(cmd_buffer);
if (dynamic_states) {
radv_cmd_buffer_flush_dynamic_state(cmd_buffer, dynamic_states);
- if (dynamic_states & (RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES |
- RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE))
+ if (dynamic_states &
+ (RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES | RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE))
radv_emit_fs_state(cmd_buffer);
}
/* Use optimal packet order based on whether we need to sync the
* pipeline.
*/
- if (cmd_buffer->state.flush_bits &
- (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB |
- RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
+ if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB |
+ RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
/* If we have to wait for idle, set all states first, so that
* all SET packets are processed in parallel with previous draw
* calls. Then upload descriptors, set shader pointers, and
struct radv_cmd_state *state = &cmd_buffer->state;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
assert(state->vtx_base_sgpr);
- if (state->last_num_instances != info->instance_count ||
- cmd_buffer->device->uses_shadow_regs) {
+ if (state->last_num_instances != info->instance_count || cmd_buffer->device->uses_shadow_regs) {
radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, false));
radeon_emit(cs, info->instance_count);
state->last_num_instances = info->instance_count;
}
ALWAYS_INLINE static bool
-radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info,
- uint32_t drawCount)
+radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, uint32_t drawCount)
{
/* For direct draws, this makes sure we don't draw anything.
* For indirect draws, this is necessary to prevent a GPU hang (on MEC version < 100).
assert(!task_shader || ace_cs);
- const VkShaderStageFlags stages = VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_FRAGMENT_BIT | (task_shader ? VK_SHADER_STAGE_TASK_BIT_EXT : 0);
- const bool pipeline_is_dirty =
- cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE &&
- cmd_buffer->state.graphics_pipeline != cmd_buffer->state.emitted_graphics_pipeline;
+ const VkShaderStageFlags stages =
+ VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_FRAGMENT_BIT | (task_shader ? VK_SHADER_STAGE_TASK_BIT_EXT : 0);
+ const bool pipeline_is_dirty = cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE &&
+ cmd_buffer->state.graphics_pipeline != cmd_buffer->state.emitted_graphics_pipeline;
const bool need_task_semaphore = task_shader && radv_flush_gang_leader_semaphore(cmd_buffer);
ASSERTED const unsigned cdw_max =
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096 + 128 * (drawCount - 1));
- ASSERTED const unsigned ace_cdw_max = !ace_cs ? 0 :
- radeon_check_space(cmd_buffer->device->ws, ace_cs, 4096 + 128 * (drawCount - 1));
+ ASSERTED const unsigned ace_cdw_max =
+ !ace_cs ? 0 : radeon_check_space(cmd_buffer->device->ws, ace_cs, 4096 + 128 * (drawCount - 1));
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
radv_emit_fb_mip_change_flush(cmd_buffer);
radv_flush_descriptors(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS);
- const VkShaderStageFlags pc_stages =
- radv_must_flush_constants(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS);
+ const VkShaderStageFlags pc_stages = radv_must_flush_constants(cmd_buffer, stages, VK_PIPELINE_BIND_POINT_GRAPHICS);
if (pc_stages)
radv_flush_constants(cmd_buffer, pc_stages, VK_PIPELINE_BIND_POINT_GRAPHICS);
* It must be done after drawing.
*/
if (radv_is_streamout_enabled(cmd_buffer) &&
- (rad_info->family == CHIP_HAWAII || rad_info->family == CHIP_TONGA ||
- rad_info->family == CHIP_FIJI)) {
+ (rad_info->family == CHIP_HAWAII || rad_info->family == CHIP_TONGA || rad_info->family == CHIP_FIJI)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_STREAMOUT_SYNC;
}
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount,
- uint32_t firstVertex, uint32_t firstInstance)
+radv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex,
+ uint32_t firstInstance)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_draw_info info;
if (!radv_before_draw(cmd_buffer, &info, 1))
return;
- const VkMultiDrawInfoEXT minfo = { firstVertex, vertexCount };
+ const VkMultiDrawInfoEXT minfo = {firstVertex, vertexCount};
radv_emit_direct_draw_packets(cmd_buffer, &info, 1, &minfo, 0, 0);
radv_after_draw(cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdDrawMultiEXT(VkCommandBuffer commandBuffer, uint32_t drawCount, const VkMultiDrawInfoEXT *pVertexInfo,
- uint32_t instanceCount, uint32_t firstInstance, uint32_t stride)
+ uint32_t instanceCount, uint32_t firstInstance, uint32_t stride)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_draw_info info;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,
- uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)
+radv_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex,
+ int32_t vertexOffset, uint32_t firstInstance)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_draw_info info;
if (!radv_before_draw(cmd_buffer, &info, 1))
return;
- const VkMultiDrawIndexedInfoEXT minfo = { firstIndex, indexCount, vertexOffset };
+ const VkMultiDrawIndexedInfoEXT minfo = {firstIndex, indexCount, vertexOffset};
radv_emit_draw_packets_indexed(cmd_buffer, &info, 1, &minfo, 0, NULL);
radv_after_draw(cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer, uint32_t drawCount, const VkMultiDrawIndexedInfoEXT *pIndexInfo,
- uint32_t instanceCount, uint32_t firstInstance, uint32_t stride, const int32_t *pVertexOffset)
+radv_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer, uint32_t drawCount,
+ const VkMultiDrawIndexedInfoEXT *pIndexInfo, uint32_t instanceCount, uint32_t firstInstance,
+ uint32_t stride, const int32_t *pVertexOffset)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_draw_info info;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset,
- uint32_t drawCount, uint32_t stride)
+radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, uint32_t drawCount,
+ uint32_t stride)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset,
- uint32_t drawCount, uint32_t stride)
+radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, uint32_t drawCount,
+ uint32_t stride)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset,
- VkBuffer _countBuffer, VkDeviceSize countBufferOffset,
- uint32_t maxDrawCount, uint32_t stride)
+radv_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, VkBuffer _countBuffer,
+ VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer,
- VkDeviceSize offset, VkBuffer _countBuffer,
- VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
+radv_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset,
+ VkBuffer _countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
uint32_t stride)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer,
- VkDeviceSize offset, uint32_t drawCount, uint32_t stride)
+radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset,
+ uint32_t drawCount, uint32_t stride)
{
if (!drawCount)
return;
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer,
- VkDeviceSize offset, VkBuffer _countBuffer,
- VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
+radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset,
+ VkBuffer _countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
uint32_t stride)
{
const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- VK_FROM_HANDLE(radv_indirect_command_layout, layout,
- pGeneratedCommandsInfo->indirectCommandsLayout);
+ VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout);
VK_FROM_HANDLE(radv_buffer, prep_buffer, pGeneratedCommandsInfo->preprocessBuffer);
/* The only actions that can be done are draws, so skip on other queues. */
return;
uint32_t cmdbuf_size = radv_get_indirect_cmdbuf_size(pGeneratedCommandsInfo);
- uint64_t va = radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset +
- pGeneratedCommandsInfo->preprocessOffset;
+ uint64_t va = radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset + pGeneratedCommandsInfo->preprocessOffset;
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
}
static void
-radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_shader *compute_shader,
+radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *compute_shader,
const struct radv_dispatch_info *info)
{
unsigned dispatch_initiator = cmd_buffer->device->dispatch_initiator;
if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va,
- &cmd_buffer->mec_inv_pred_emitted,
- 4 /* DISPATCH_INDIRECT size */);
+ &cmd_buffer->mec_inv_pred_emitted, 4 /* DISPATCH_INDIRECT size */);
radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) | PKT3_SHADER_TYPE_S(1));
radeon_emit(cs, info->va);
radeon_emit(cs, info->va >> 32);
}
radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
- radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) |
- S_00B81C_NUM_THREAD_PARTIAL(remainder[0]));
- radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) |
- S_00B81C_NUM_THREAD_PARTIAL(remainder[1]));
- radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) |
- S_00B81C_NUM_THREAD_PARTIAL(remainder[2]));
+ radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) | S_00B81C_NUM_THREAD_PARTIAL(remainder[0]));
+ radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) | S_00B81C_NUM_THREAD_PARTIAL(remainder[1]));
+ radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) | S_00B81C_NUM_THREAD_PARTIAL(remainder[2]));
dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1);
}
if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va,
- &cmd_buffer->mec_inv_pred_emitted,
- 5 /* DISPATCH_DIRECT size */);
+ &cmd_buffer->mec_inv_pred_emitted, 5 /* DISPATCH_DIRECT size */);
predicating = false;
}
}
static void
-radv_upload_compute_shader_descriptors(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint bind_point)
+radv_upload_compute_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
{
radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT, bind_point);
- const VkShaderStageFlags stages = bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR
- ? RADV_RT_STAGE_BITS
- : VK_SHADER_STAGE_COMPUTE_BIT;
+ const VkShaderStageFlags stages =
+ bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR ? RADV_RT_STAGE_BITS : VK_SHADER_STAGE_COMPUTE_BIT;
const VkShaderStageFlags pc_stages = radv_must_flush_constants(cmd_buffer, stages, bind_point);
if (pc_stages)
radv_flush_constants(cmd_buffer, pc_stages, bind_point);
bool pipeline_is_dirty = pipeline != cmd_buffer->state.emitted_compute_pipeline;
if (compute_shader->info.cs.regalloc_hang_bug)
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
- if (cmd_buffer->state.flush_bits &
- (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB |
- RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
+ if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB |
+ RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
/* If we have to wait for idle, set all states first, so that
* all SET packets are processed in parallel with previous draw
* calls. Then upload descriptors, set shader pointers, and
void
radv_compute_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info)
{
- radv_dispatch(cmd_buffer, info, cmd_buffer->state.compute_pipeline,
- cmd_buffer->state.shaders[MESA_SHADER_COMPUTE],
+ radv_dispatch(cmd_buffer, info, cmd_buffer->state.compute_pipeline, cmd_buffer->state.shaders[MESA_SHADER_COMPUTE],
VK_PIPELINE_BIND_POINT_COMPUTE);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t base_x, uint32_t base_y,
- uint32_t base_z, uint32_t x, uint32_t y, uint32_t z)
+radv_CmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t base_x, uint32_t base_y, uint32_t base_z, uint32_t x,
+ uint32_t y, uint32_t z)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_dispatch_info info = {0};
};
static void
-radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCommand2KHR *tables,
- uint64_t indirect_va, enum radv_rt_mode mode)
+radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCommand2KHR *tables, uint64_t indirect_va,
+ enum radv_rt_mode mode)
{
if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_RT)
return;
uint32_t wave_size = rt_prolog->info.wave_size;
/* The hardware register is specified as a multiple of 64 or 256 DWORDS. */
- unsigned scratch_alloc_granule =
- cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 ? 256 : 1024;
- scratch_bytes_per_wave +=
- align(cmd_buffer->state.rt_stack_size * wave_size, scratch_alloc_granule);
+ unsigned scratch_alloc_granule = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 ? 256 : 1024;
+ scratch_bytes_per_wave += align(cmd_buffer->state.rt_stack_size * wave_size, scratch_alloc_granule);
cmd_buffer->compute_scratch_size_per_wave_needed =
MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, scratch_bytes_per_wave);
uint64_t sbt_va;
if (mode != radv_rt_mode_indirect2) {
- uint32_t upload_size = mode == radv_rt_mode_direct
- ? sizeof(VkTraceRaysIndirectCommand2KHR)
- : offsetof(VkTraceRaysIndirectCommand2KHR, width);
+ uint32_t upload_size = mode == radv_rt_mode_direct ? sizeof(VkTraceRaysIndirectCommand2KHR)
+ : offsetof(VkTraceRaysIndirectCommand2KHR, width);
uint32_t offset;
if (!radv_cmd_buffer_upload_data(cmd_buffer, upload_size, tables, &offset))
uint64_t upload_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset;
- launch_size_va = (mode == radv_rt_mode_direct)
- ? upload_va + offsetof(VkTraceRaysIndirectCommand2KHR, width)
- : indirect_va;
+ launch_size_va =
+ (mode == radv_rt_mode_direct) ? upload_va + offsetof(VkTraceRaysIndirectCommand2KHR, width) : indirect_va;
sbt_va = upload_va;
} else {
launch_size_va = indirect_va + offsetof(VkTraceRaysIndirectCommand2KHR, width);
ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15);
- const struct radv_userdata_info *desc_loc =
- radv_get_user_sgpr(rt_prolog, AC_UD_CS_SBT_DESCRIPTORS);
+ const struct radv_userdata_info *desc_loc = radv_get_user_sgpr(rt_prolog, AC_UD_CS_SBT_DESCRIPTORS);
if (desc_loc->sgpr_idx != -1) {
- radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
- base_reg + desc_loc->sgpr_idx * 4, sbt_va, true);
+ radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + desc_loc->sgpr_idx * 4, sbt_va, true);
}
- const struct radv_userdata_info *size_loc =
- radv_get_user_sgpr(rt_prolog, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
+ const struct radv_userdata_info *size_loc = radv_get_user_sgpr(rt_prolog, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
if (size_loc->sgpr_idx != -1) {
- radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
- base_reg + size_loc->sgpr_idx * 4, launch_size_va, true);
+ radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + size_loc->sgpr_idx * 4, launch_size_va,
+ true);
}
- const struct radv_userdata_info *base_loc =
- radv_get_user_sgpr(rt_prolog, AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE);
+ const struct radv_userdata_info *base_loc = radv_get_user_sgpr(rt_prolog, AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE);
if (base_loc->sgpr_idx != -1) {
const struct radv_shader_info *cs_info = &rt_prolog->info;
radeon_set_sh_reg(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + base_loc->sgpr_idx * 4,
pipeline->base.scratch_bytes_per_wave / cs_info->wave_size);
}
- const struct radv_userdata_info *shader_loc =
- radv_get_user_sgpr(rt_prolog, AC_UD_CS_TRAVERSAL_SHADER_ADDR);
+ const struct radv_userdata_info *shader_loc = radv_get_user_sgpr(rt_prolog, AC_UD_CS_TRAVERSAL_SHADER_ADDR);
if (shader_loc->sgpr_idx != -1) {
- uint64_t traversal_va =
- cmd_buffer->state.shaders[MESA_SHADER_INTERSECTION]->va | radv_rt_priority_traversal;
- radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
- base_reg + shader_loc->sgpr_idx * 4, traversal_va, true);
+ uint64_t traversal_va = cmd_buffer->state.shaders[MESA_SHADER_INTERSECTION]->va | radv_rt_priority_traversal;
+ radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + shader_loc->sgpr_idx * 4, traversal_va,
+ true);
}
assert(cmd_buffer->cs->cdw <= cdw_max);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdTraceRaysKHR(VkCommandBuffer commandBuffer,
- const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
+radv_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
- const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable,
- uint32_t width, uint32_t height, uint32_t depth)
+ const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, uint32_t width,
+ uint32_t height, uint32_t depth)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
/* Transitioning from LAYOUT_UNDEFINED layout not everyone is consistent
* in considering previous rendering work for WAW hazards. */
- state->flush_bits |=
- radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, image);
+ state->flush_bits |= radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, image);
if (image->planes[0].surface.has_stencil &&
!(range->aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
static void
radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- VkImageLayout src_layout, VkImageLayout dst_layout,
- unsigned src_queue_mask, unsigned dst_queue_mask,
- const VkImageSubresourceRange *range,
+ VkImageLayout src_layout, VkImageLayout dst_layout, unsigned src_queue_mask,
+ unsigned dst_queue_mask, const VkImageSubresourceRange *range,
struct radv_sample_locations_state *sample_locs)
{
struct radv_device *device = cmd_buffer->device;
radv_initialize_htile(cmd_buffer, image, range);
} else if (radv_layout_is_htile_compressed(device, image, src_layout, src_queue_mask) &&
!radv_layout_is_htile_compressed(device, image, dst_layout, dst_queue_mask)) {
- cmd_buffer->state.flush_bits |=
- RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
radv_expand_depth_stencil(cmd_buffer, image, range, sample_locs);
- cmd_buffer->state.flush_bits |=
- RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
}
}
static uint32_t
-radv_init_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- const VkImageSubresourceRange *range, uint32_t value)
+radv_init_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
+ uint32_t value)
{
struct radv_barrier_data barrier = {0};
}
uint32_t
-radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- const VkImageSubresourceRange *range)
+radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range)
{
static const uint32_t fmask_clear_values[4] = {0x00000000, 0x02020202, 0xE4E4E4E4, 0x76543210};
uint32_t log2_samples = util_logbase2(image->vk.samples);
}
uint32_t
-radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- const VkImageSubresourceRange *range, uint32_t value)
+radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
+ uint32_t value)
{
struct radv_barrier_data barrier = {0};
uint32_t flush_bits = 0;
/* Compute the size of all fast clearable DCC levels. */
for (unsigned i = 0; i < image->planes[0].surface.num_meta_levels; i++) {
struct legacy_surf_dcc_level *dcc_level = &image->planes[0].surface.u.legacy.color.dcc_level[i];
- unsigned dcc_fast_clear_size =
- dcc_level->dcc_slice_fast_clear_size * image->vk.array_layers;
+ unsigned dcc_fast_clear_size = dcc_level->dcc_slice_fast_clear_size * image->vk.array_layers;
if (!dcc_fast_clear_size)
break;
/* Initialize the mipmap levels without DCC. */
if (size != image->planes[0].surface.meta_size) {
flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
- radv_buffer_get_va(image->bindings[0].bo) +
- image->bindings[0].offset +
+ radv_buffer_get_va(image->bindings[0].bo) + image->bindings[0].offset +
image->planes[0].surface.meta_offset + size,
image->planes[0].surface.meta_size - size, 0xffffffff);
}
* Initialize DCC/FMASK/CMASK metadata for a color image.
*/
static void
-radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- VkImageLayout src_layout, VkImageLayout dst_layout,
- unsigned src_queue_mask, unsigned dst_queue_mask,
+radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout,
+ VkImageLayout dst_layout, unsigned src_queue_mask, unsigned dst_queue_mask,
const VkImageSubresourceRange *range)
{
uint32_t flush_bits = 0;
/* Transitioning from LAYOUT_UNDEFINED layout not everyone is
* consistent in considering previous rendering work for WAW hazards.
*/
- cmd_buffer->state.flush_bits |=
- radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
+ cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
if (radv_image_has_cmask(image)) {
uint32_t value;
/* TODO: Fix clearing CMASK layers on GFX9. */
if (radv_image_is_tc_compat_cmask(image) ||
(radv_image_has_fmask(image) &&
- radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, dst_layout,
- dst_queue_mask))) {
+ radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, dst_layout, dst_queue_mask))) {
value = 0xccccccccu;
} else {
value = 0xffffffffu;
if (radv_dcc_enabled(image, range->baseMipLevel)) {
uint32_t value = 0xffffffffu; /* Fully expanded mode. */
- if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel,
- dst_layout, dst_queue_mask)) {
+ if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, dst_layout, dst_queue_mask)) {
value = 0u;
}
}
static void
-radv_retile_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- VkImageLayout src_layout, VkImageLayout dst_layout, unsigned dst_queue_mask)
+radv_retile_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout,
+ VkImageLayout dst_layout, unsigned dst_queue_mask)
{
/* If the image is read-only, we don't have to retile DCC because it can't change. */
if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS))
return;
if (src_layout != VK_IMAGE_LAYOUT_PRESENT_SRC_KHR &&
- (dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR ||
- (dst_queue_mask & (1u << RADV_QUEUE_FOREIGN))))
+ (dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR || (dst_queue_mask & (1u << RADV_QUEUE_FOREIGN))))
radv_retile_dcc(cmd_buffer, image);
}
*/
static void
radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- VkImageLayout src_layout, VkImageLayout dst_layout,
- unsigned src_queue_mask, unsigned dst_queue_mask,
- const VkImageSubresourceRange *range)
+ VkImageLayout src_layout, VkImageLayout dst_layout, unsigned src_queue_mask,
+ unsigned dst_queue_mask, const VkImageSubresourceRange *range)
{
bool dcc_decompressed = false, fast_clear_flushed = false;
- if (!radv_image_has_cmask(image) && !radv_image_has_fmask(image) &&
- !radv_dcc_enabled(image, range->baseMipLevel))
+ if (!radv_image_has_cmask(image) && !radv_image_has_fmask(image) && !radv_dcc_enabled(image, range->baseMipLevel))
return;
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
- radv_init_color_image_metadata(cmd_buffer, image, src_layout, dst_layout,
- src_queue_mask, dst_queue_mask, range);
+ radv_init_color_image_metadata(cmd_buffer, image, src_layout, dst_layout, src_queue_mask, dst_queue_mask, range);
if (radv_image_need_retile(image))
radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask);
if (radv_dcc_enabled(image, range->baseMipLevel)) {
if (src_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, range, 0xffffffffu);
- } else if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel,
- src_layout, src_queue_mask) &&
- !radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel,
- dst_layout, dst_queue_mask)) {
+ } else if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, src_layout,
+ src_queue_mask) &&
+ !radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, dst_layout,
+ dst_queue_mask)) {
radv_decompress_dcc(cmd_buffer, image, range);
dcc_decompressed = true;
- } else if (radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel,
- src_layout, src_queue_mask) &&
- !radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel,
- dst_layout, dst_queue_mask)) {
+ } else if (radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, src_layout,
+ src_queue_mask) &&
+ !radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, dst_layout,
+ dst_queue_mask)) {
radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
fast_clear_flushed = true;
}
if (radv_image_need_retile(image))
radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask);
} else if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) {
- if (radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel,
- src_layout, src_queue_mask) &&
- !radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel,
- dst_layout, dst_queue_mask)) {
+ if (radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, src_layout, src_queue_mask) &&
+ !radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, dst_layout, dst_queue_mask)) {
radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
fast_clear_flushed = true;
}
}
/* MSAA color decompress. */
- const enum radv_fmask_compression src_fmask_comp = radv_layout_fmask_compression(cmd_buffer->device,
- image, src_layout, src_queue_mask);
- const enum radv_fmask_compression dst_fmask_comp = radv_layout_fmask_compression(cmd_buffer->device,
- image, dst_layout, dst_queue_mask);
+ const enum radv_fmask_compression src_fmask_comp =
+ radv_layout_fmask_compression(cmd_buffer->device, image, src_layout, src_queue_mask);
+ const enum radv_fmask_compression dst_fmask_comp =
+ radv_layout_fmask_compression(cmd_buffer->device, image, dst_layout, dst_queue_mask);
if (src_fmask_comp <= dst_fmask_comp)
return;
if (src_fmask_comp == RADV_FMASK_COMPRESSION_FULL) {
- if (radv_dcc_enabled(image, range->baseMipLevel) &&
- !radv_image_use_dcc_image_stores(cmd_buffer->device, image) && !dcc_decompressed) {
+ if (radv_dcc_enabled(image, range->baseMipLevel) && !radv_image_use_dcc_image_stores(cmd_buffer->device, image) &&
+ !dcc_decompressed) {
/* A DCC decompress is required before expanding FMASK
* when DCC stores aren't supported to avoid being in
* a state where DCC is compressed and the main
}
static void
-radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
- VkImageLayout src_layout, VkImageLayout dst_layout,
- uint32_t src_family_index, uint32_t dst_family_index,
- const VkImageSubresourceRange *range,
- struct radv_sample_locations_state *sample_locs)
+radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout,
+ VkImageLayout dst_layout, uint32_t src_family_index, uint32_t dst_family_index,
+ const VkImageSubresourceRange *range, struct radv_sample_locations_state *sample_locs)
{
enum radv_queue_family src_qf = vk_queue_to_radv(cmd_buffer->device->physical_device, src_family_index);
enum radv_queue_family dst_qf = vk_queue_to_radv(cmd_buffer->device->physical_device, dst_family_index);
* a corresponding release/acquire. Do the transition in the
* most flexible queue. */
- assert(src_qf == cmd_buffer->qf ||
- dst_qf == cmd_buffer->qf);
+ assert(src_qf == cmd_buffer->qf || dst_qf == cmd_buffer->qf);
if (src_family_index == VK_QUEUE_FAMILY_EXTERNAL || src_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT)
return;
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER)
return;
- if (cmd_buffer->qf == RADV_QUEUE_COMPUTE &&
- (src_qf == RADV_QUEUE_GENERAL || dst_qf == RADV_QUEUE_GENERAL))
+ if (cmd_buffer->qf == RADV_QUEUE_COMPUTE && (src_qf == RADV_QUEUE_GENERAL || dst_qf == RADV_QUEUE_GENERAL))
return;
}
- unsigned src_queue_mask =
- radv_image_queue_family_mask(image, src_qf, cmd_buffer->qf);
- unsigned dst_queue_mask =
- radv_image_queue_family_mask(image, dst_qf, cmd_buffer->qf);
+ unsigned src_queue_mask = radv_image_queue_family_mask(image, src_qf, cmd_buffer->qf);
+ unsigned dst_queue_mask = radv_image_queue_family_mask(image, dst_qf, cmd_buffer->qf);
if (src_layout == dst_layout && src_queue_mask == dst_queue_mask)
return;
if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
- radv_handle_depth_image_transition(cmd_buffer, image, src_layout, dst_layout,
- src_queue_mask, dst_queue_mask, range, sample_locs);
+ radv_handle_depth_image_transition(cmd_buffer, image, src_layout, dst_layout, src_queue_mask, dst_queue_mask,
+ range, sample_locs);
} else {
- radv_handle_color_image_transition(cmd_buffer, image, src_layout, dst_layout,
- src_queue_mask, dst_queue_mask, range);
+ radv_handle_color_image_transition(cmd_buffer, image, src_layout, dst_layout, src_queue_mask, dst_queue_mask,
+ range);
}
}
* operation but it might also use a CP DMA copy in some rare situations. Other operations using
* a CP DMA clear are implicitly synchronized (see CP_DMA_SYNC).
*/
- if (stage_mask & (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT |
- VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
- VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT))
+ if (stage_mask &
+ (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT | VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT |
+ VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT))
si_cp_dma_wait_for_idle(cmd_buffer);
}
static void
-radv_barrier(struct radv_cmd_buffer *cmd_buffer, const VkDependencyInfo *dep_info,
- enum rgp_barrier_reason reason)
+radv_barrier(struct radv_cmd_buffer *cmd_buffer, const VkDependencyInfo *dep_info, enum rgp_barrier_reason reason)
{
enum radv_cmd_flush_bits src_flush_bits = 0;
enum radv_cmd_flush_bits dst_flush_bits = 0;
for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) {
src_stage_mask |= dep_info->pMemoryBarriers[i].srcStageMask;
- src_flush_bits |=
- radv_src_access_flush(cmd_buffer, dep_info->pMemoryBarriers[i].srcAccessMask, NULL);
+ src_flush_bits |= radv_src_access_flush(cmd_buffer, dep_info->pMemoryBarriers[i].srcAccessMask, NULL);
dst_stage_mask |= dep_info->pMemoryBarriers[i].dstStageMask;
- dst_flush_bits |=
- radv_dst_access_flush(cmd_buffer, dep_info->pMemoryBarriers[i].dstAccessMask, NULL);
+ dst_flush_bits |= radv_dst_access_flush(cmd_buffer, dep_info->pMemoryBarriers[i].dstAccessMask, NULL);
}
for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) {
src_stage_mask |= dep_info->pBufferMemoryBarriers[i].srcStageMask;
- src_flush_bits |=
- radv_src_access_flush(cmd_buffer, dep_info->pBufferMemoryBarriers[i].srcAccessMask, NULL);
+ src_flush_bits |= radv_src_access_flush(cmd_buffer, dep_info->pBufferMemoryBarriers[i].srcAccessMask, NULL);
dst_stage_mask |= dep_info->pBufferMemoryBarriers[i].dstStageMask;
- dst_flush_bits |=
- radv_dst_access_flush(cmd_buffer, dep_info->pBufferMemoryBarriers[i].dstAccessMask, NULL);
+ dst_flush_bits |= radv_dst_access_flush(cmd_buffer, dep_info->pBufferMemoryBarriers[i].dstAccessMask, NULL);
}
for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {
RADV_FROM_HANDLE(radv_image, image, dep_info->pImageMemoryBarriers[i].image);
src_stage_mask |= dep_info->pImageMemoryBarriers[i].srcStageMask;
- src_flush_bits |=
- radv_src_access_flush(cmd_buffer, dep_info->pImageMemoryBarriers[i].srcAccessMask, image);
+ src_flush_bits |= radv_src_access_flush(cmd_buffer, dep_info->pImageMemoryBarriers[i].srcAccessMask, image);
dst_stage_mask |= dep_info->pImageMemoryBarriers[i].dstStageMask;
- dst_flush_bits |=
- radv_dst_access_flush(cmd_buffer, dep_info->pImageMemoryBarriers[i].dstAccessMask, image);
+ dst_flush_bits |= radv_dst_access_flush(cmd_buffer, dep_info->pImageMemoryBarriers[i].dstAccessMask, image);
}
/* The Vulkan spec 1.1.98 says:
}
radv_handle_image_transition(
- cmd_buffer, image, dep_info->pImageMemoryBarriers[i].oldLayout,
- dep_info->pImageMemoryBarriers[i].newLayout,
- dep_info->pImageMemoryBarriers[i].srcQueueFamilyIndex,
- dep_info->pImageMemoryBarriers[i].dstQueueFamilyIndex,
+ cmd_buffer, image, dep_info->pImageMemoryBarriers[i].oldLayout, dep_info->pImageMemoryBarriers[i].newLayout,
+ dep_info->pImageMemoryBarriers[i].srcQueueFamilyIndex, dep_info->pImageMemoryBarriers[i].dstQueueFamilyIndex,
&dep_info->pImageMemoryBarriers[i].subresourceRange, sample_locs_info ? &sample_locations : NULL);
}
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
- const VkDependencyInfo *pDependencyInfo)
+radv_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, const VkDependencyInfo *pDependencyInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
}
static void
-write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event,
- VkPipelineStageFlags2 stageMask, unsigned value)
+write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, VkPipelineStageFlags2 stageMask,
+ unsigned value)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(event->bo);
ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28);
- if (stageMask & (VK_PIPELINE_STAGE_2_COPY_BIT |
- VK_PIPELINE_STAGE_2_RESOLVE_BIT |
- VK_PIPELINE_STAGE_2_BLIT_BIT |
+ if (stageMask & (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_RESOLVE_BIT | VK_PIPELINE_STAGE_2_BLIT_BIT |
VK_PIPELINE_STAGE_2_CLEAR_BIT)) {
/* Be conservative for now. */
stageMask |= VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT;
/* Flags that only require signaling post PS. */
VkPipelineStageFlags2 post_ps_flags =
post_index_fetch_flags | VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT |
- VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT |
- VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT |
- VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT |
- VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT |
- VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT |
- VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR |
- VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT;
+ VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT |
+ VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT |
+ VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT | VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT |
+ VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR | VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
+ VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT;
/* Flags that only require signaling post CS. */
VkPipelineStageFlags2 post_cs_flags = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
}
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
- radv_cmd_buffer_uses_mec(cmd_buffer), event_type, 0,
- EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, value,
- cmd_buffer->gfx9_eop_bug_va);
+ radv_cmd_buffer_uses_mec(cmd_buffer), event_type, 0, EOP_DST_SEL_MEM,
+ EOP_DATA_SEL_VALUE_32BIT, va, value, cmd_buffer->gfx9_eop_bug_va);
}
assert(cmd_buffer->cs->cdw <= cdw_max);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetEvent2(VkCommandBuffer commandBuffer, VkEvent _event,
- const VkDependencyInfo* pDependencyInfo)
+radv_CmdSetEvent2(VkCommandBuffer commandBuffer, VkEvent _event, const VkDependencyInfo *pDependencyInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_event, event, _event);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdResetEvent2(VkCommandBuffer commandBuffer, VkEvent _event,
- VkPipelineStageFlags2 stageMask)
+radv_CmdResetEvent2(VkCommandBuffer commandBuffer, VkEvent _event, VkPipelineStageFlags2 stageMask)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_event, event, _event);
VKAPI_ATTR void VKAPI_CALL
radv_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
- const VkDependencyInfo* pDependencyInfos)
+ const VkDependencyInfo *pDependencyInfos)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
/* VK_EXT_conditional_rendering */
VKAPI_ATTR void VKAPI_CALL
-radv_CmdBeginConditionalRenderingEXT(
- VkCommandBuffer commandBuffer,
- const VkConditionalRenderingBeginInfoEXT *pConditionalRenderingBegin)
+radv_CmdBeginConditionalRenderingEXT(VkCommandBuffer commandBuffer,
+ const VkConditionalRenderingBeginInfoEXT *pConditionalRenderingBegin)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer);
si_emit_cache_flush(cmd_buffer);
- if (cmd_buffer->qf == RADV_QUEUE_GENERAL &&
- !cmd_buffer->device->physical_device->rad_info.has_32bit_predication) {
+ if (cmd_buffer->qf == RADV_QUEUE_GENERAL && !cmd_buffer->device->physical_device->rad_info.has_32bit_predication) {
uint64_t pred_value = 0, pred_va;
unsigned pred_offset;
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 8);
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs,
+ COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, pred_va);
/* VK_EXT_transform_feedback */
VKAPI_ATTR void VKAPI_CALL
-radv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, uint32_t firstBinding,
- uint32_t bindingCount, const VkBuffer *pBuffers,
- const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes)
+radv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount,
+ const VkBuffer *pBuffers, const VkDeviceSize *pOffsets,
+ const VkDeviceSize *pSizes)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
radeon_set_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
radeon_emit(cs, S_028B94_STREAMOUT_0_EN(streamout_enabled) | S_028B94_RAST_STREAM(0) |
- S_028B94_STREAMOUT_1_EN(streamout_enabled) |
- S_028B94_STREAMOUT_2_EN(streamout_enabled) |
+ S_028B94_STREAMOUT_1_EN(streamout_enabled) | S_028B94_STREAMOUT_2_EN(streamout_enabled) |
S_028B94_STREAMOUT_3_EN(streamout_enabled));
radeon_emit(cs, so->hw_enabled_mask & enabled_stream_buffers_mask);
so->streamout_enabled = enable;
- so->hw_enabled_mask = so->enabled_mask | (so->enabled_mask << 4) | (so->enabled_mask << 8) |
- (so->enabled_mask << 12);
+ so->hw_enabled_mask =
+ so->enabled_mask | (so->enabled_mask << 4) | (so->enabled_mask << 8) | (so->enabled_mask << 12);
if (!cmd_buffer->device->physical_device->use_ngg_streamout &&
((old_streamout_enabled != radv_is_streamout_enabled(cmd_buffer)) ||
radeon_emit(cs, EVENT_TYPE(V_028A90_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs,
- WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+ radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
radeon_emit(cs, reg_strmout_cntl >> 2); /* register */
radeon_emit(cs, 0);
radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
radv_flush_vgt_streamout(cmd_buffer);
}
- ASSERTED unsigned cdw_max =
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MAX_SO_BUFFERS * 10);
+ ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MAX_SO_BUFFERS * 10);
- u_foreach_bit(i, so->enabled_mask)
- {
+ u_foreach_bit (i, so->enabled_mask) {
int32_t counter_buffer_idx = i - firstCounterBuffer;
if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
counter_buffer_idx = -1;
- bool append =
- counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx];
+ bool append = counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx];
uint64_t va = 0;
if (append) {
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
- radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) |
- S_411_DST_SEL(V_411_GDS) | S_411_CP_SYNC(i == last_target));
+ radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) | S_411_DST_SEL(V_411_GDS) |
+ S_411_CP_SYNC(i == last_target));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, 4 * i); /* destination in GDS */
* SGPRs what to do.
*/
radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 2);
- radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */
- radeon_emit(cs, info->so.strides[i]); /* VTX_STRIDE (in DW) */
+ radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */
+ radeon_emit(cs, info->so.strides[i]); /* VTX_STRIDE (in DW) */
cmd_buffer->state.context_roll_without_scissor_emitted = true;
if (append) {
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
- radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */
- STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, va); /* src address lo */
- radeon_emit(cs, va >> 32); /* src address hi */
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, va); /* src address lo */
+ radeon_emit(cs, va >> 32); /* src address hi */
} else {
/* Start from the beginning. */
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
- radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */
- STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
}
}
}
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer,
- uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
- const VkDeviceSize *pCounterBufferOffsets)
+radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, uint32_t counterBufferCount,
+ const VkBuffer *pCounterBuffers, const VkDeviceSize *pCounterBufferOffsets)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_streamout_state *so = &cmd_buffer->state.streamout;
if (!cmd_buffer->device->physical_device->use_ngg_streamout)
radv_flush_vgt_streamout(cmd_buffer);
- ASSERTED unsigned cdw_max =
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MAX_SO_BUFFERS * 12);
+ ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MAX_SO_BUFFERS * 12);
- u_foreach_bit(i, so->enabled_mask)
- {
+ u_foreach_bit (i, so->enabled_mask) {
int32_t counter_buffer_idx = i - firstCounterBuffer;
if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
counter_buffer_idx = -1;
- bool append =
- counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx];
+ bool append = counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx];
uint64_t va = 0;
if (append) {
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
if (append) {
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
- radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_PS_DONE, 0,
- EOP_DST_SEL_TC_L2, EOP_DATA_SEL_GDS, va, EOP_DATA_GDS(i, 1), 0);
+ radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_PS_DONE, 0, EOP_DST_SEL_TC_L2,
+ EOP_DATA_SEL_GDS, va, EOP_DATA_GDS(i, 1), 0);
}
} else {
if (append) {
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */
- STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
- STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
- radeon_emit(cs, va); /* dst address lo */
- radeon_emit(cs, va >> 32); /* dst address hi */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, 0); /* unused */
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
+ STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
+ radeon_emit(cs, va); /* dst address lo */
+ radeon_emit(cs, va >> 32); /* dst address hi */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
}
/* Deactivate transform feedback by zeroing the buffer size.
radeon_emit(cs, 1); /* 1 DWORD */
} else {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) |
- COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanceCount,
- uint32_t firstInstance, VkBuffer _counterBuffer,
- VkDeviceSize counterBufferOffset, uint32_t counterOffset,
+radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanceCount, uint32_t firstInstance,
+ VkBuffer _counterBuffer, VkDeviceSize counterBufferOffset, uint32_t counterOffset,
uint32_t vertexStride)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
if (!radv_before_draw(cmd_buffer, &info, 1))
return;
- struct VkMultiDrawInfoEXT minfo = { 0, 0 };
+ struct VkMultiDrawInfoEXT minfo = {0, 0};
radv_emit_strmout_buffer(cmd_buffer, &info);
radv_emit_direct_draw_packets(cmd_buffer, &info, 1, &minfo, S_0287F0_USE_OPAQUE(1), 0);
radv_after_draw(cmd_buffer);
/* VK_AMD_buffer_marker */
VKAPI_ATTR void VKAPI_CALL
-radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage,
- VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker)
+radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkBuffer dstBuffer,
+ VkDeviceSize dstOffset, uint32_t marker)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, buffer, dstBuffer);
if (!(stage & ~VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
radeon_emit(cs, marker);
radeon_emit(cs, 0);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
} else {
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
- radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS,
- 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, marker,
- cmd_buffer->gfx9_eop_bug_va);
+ radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
+ EOP_DATA_SEL_VALUE_32BIT, va, marker, cmd_buffer->gfx9_eop_bug_va);
}
assert(cmd_buffer->cs->cdw <= cdw_max);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdBindPipelineShaderGroupNV(VkCommandBuffer commandBuffer,
- VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline,
- uint32_t groupIndex)
+radv_CmdBindPipelineShaderGroupNV(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
+ VkPipeline pipeline, uint32_t groupIndex)
{
fprintf(stderr, "radv: unimplemented vkCmdBindPipelineShaderGroupNV\n");
abort();
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdSetDescriptorBufferOffsetsEXT(VkCommandBuffer commandBuffer,
- VkPipelineBindPoint pipelineBindPoint,
+radv_CmdSetDescriptorBufferOffsetsEXT(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
VkPipelineLayout _layout, uint32_t firstSet, uint32_t setCount,
const uint32_t *pBufferIndices, const VkDeviceSize *pOffsets)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
+ struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
for (unsigned i = 0; i < setCount; i++) {
unsigned idx = i + firstSet;
- descriptors_state->descriptor_buffers[idx] =
- cmd_buffer->descriptor_buffers[pBufferIndices[i]] + pOffsets[i];
+ descriptors_state->descriptor_buffers[idx] = cmd_buffer->descriptor_buffers[pBufferIndices[i]] + pOffsets[i];
radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, NULL, idx);
}
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdBindDescriptorBufferEmbeddedSamplersEXT(VkCommandBuffer commandBuffer,
- VkPipelineBindPoint pipelineBindPoint,
+radv_CmdBindDescriptorBufferEmbeddedSamplersEXT(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
VkPipelineLayout _layout, uint32_t set)
{
/* This is a no-op because embedded samplers are inlined at compile time. */
#define NUM_DEPTH_CLEAR_PIPELINES 2
#define NUM_DEPTH_DECOMPRESS_PIPELINES 3
-#define MAX_FRAMEBUFFER_WIDTH (1u << 14)
-#define MAX_FRAMEBUFFER_HEIGHT (1u << 14)
+#define MAX_FRAMEBUFFER_WIDTH (1u << 14)
+#define MAX_FRAMEBUFFER_HEIGHT (1u << 14)
/*
* This is the point we switch from using CP to compute shader
#define RADV_SHADER_ALLOC_MIN_ARENA_SIZE (256 * 1024)
/* 256 KiB << 5 = 8 MiB */
#define RADV_SHADER_ALLOC_MAX_ARENA_SIZE_SHIFT 5u
-#define RADV_SHADER_ALLOC_MIN_SIZE_CLASS 8
-#define RADV_SHADER_ALLOC_MAX_SIZE_CLASS 15
-#define RADV_SHADER_ALLOC_NUM_FREE_LISTS \
- (RADV_SHADER_ALLOC_MAX_SIZE_CLASS - RADV_SHADER_ALLOC_MIN_SIZE_CLASS + 1)
+#define RADV_SHADER_ALLOC_MIN_SIZE_CLASS 8
+#define RADV_SHADER_ALLOC_MAX_SIZE_CLASS 15
+#define RADV_SHADER_ALLOC_NUM_FREE_LISTS (RADV_SHADER_ALLOC_MAX_SIZE_CLASS - RADV_SHADER_ALLOC_MIN_SIZE_CLASS + 1)
#define PERF_CTR_MAX_PASSES 512
#define PERF_CTR_BO_PASS_OFFSET 16
* offset 20|24|28|32 - generated primitive counter for stream 0|1|2|3
* offset 36|40|44|48 - written primitive counter for stream 0|1|2|3
*/
-#define RADV_NGG_QUERY_PIPELINE_STAT_OFFSET 16
+#define RADV_NGG_QUERY_PIPELINE_STAT_OFFSET 16
#define RADV_NGG_QUERY_PRIM_GEN_OFFSET(stream) (20 + stream * 4)
#define RADV_NGG_QUERY_PRIM_XFB_OFFSET(stream) (36 + stream * 4)
#include "sid.h"
static void
-radv_set_context_reg_array(struct radeon_cmdbuf *cs, unsigned reg, unsigned num,
- const uint32_t *values)
+radv_set_context_reg_array(struct radeon_cmdbuf *cs, unsigned reg, unsigned num, const uint32_t *values)
{
radeon_set_context_reg_seq(cs, reg, num);
radeon_emit_array(cs, values, num);
}
VkResult
-radv_create_shadow_regs_preamble(const struct radv_device *device,
- struct radv_queue_state *queue_state)
+radv_create_shadow_regs_preamble(const struct radv_device *device, struct radv_queue_state *queue_state)
{
struct radeon_winsys *ws = device->ws;
const struct radeon_info *info = &device->physical_device->rad_info;
/* allocate memory for queue_state->shadowed_regs where register states are saved */
result = ws->buffer_create(ws, SI_SHADOWED_REG_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
- RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_NO_INTERPROCESS_SHARING,
- RADV_BO_PRIORITY_SCRATCH, 0, &queue_state->shadowed_regs);
+ RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_NO_INTERPROCESS_SHARING, RADV_BO_PRIORITY_SCRATCH, 0,
+ &queue_state->shadowed_regs);
if (result != VK_SUCCESS)
goto fail;
/* fill the cs for shadow regs preamble ib that starts the register shadowing */
- ac_create_shadowing_ib_preamble(info, (pm4_cmd_add_fn)&radeon_emit, cs,
- queue_state->shadowed_regs->va, device->pbb_allowed);
+ ac_create_shadowing_ib_preamble(info, (pm4_cmd_add_fn)&radeon_emit, cs, queue_state->shadowed_regs->va,
+ device->pbb_allowed);
while (cs->cdw & 7) {
if (info->gfx_ib_pad_with_type2)
radeon_emit(cs, PKT3_NOP_PAD);
}
- result = ws->buffer_create(ws, cs->cdw * 4, 4096, ws->cs_domain(ws),
- RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
- RADV_BO_PRIORITY_CS, 0, &queue_state->shadow_regs_ib);
+ result = ws->buffer_create(
+ ws, cs->cdw * 4, 4096, ws->cs_domain(ws),
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
+ RADV_BO_PRIORITY_CS, 0, &queue_state->shadow_regs_ib);
if (result != VK_SUCCESS)
goto fail_ib_buffer;
}
static inline void
-radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
- unsigned reg, unsigned idx, unsigned value)
+radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, unsigned reg, unsigned idx,
+ unsigned value)
{
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
assert(cs->cdw + 3 <= cs->reserved_dw);
}
static inline void
-radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
- unsigned reg, unsigned idx, unsigned value)
+radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, unsigned reg,
+ unsigned idx, unsigned value)
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->cdw + 3 <= cs->reserved_dw);
* that means that it can skip register writes due to not taking correctly into account the
* fields from the GRBM_GFX_INDEX. With this bit we can force the write.
*/
- bool filter_cam_workaround = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10 &&
- cmd_buffer->qf == RADV_QUEUE_GENERAL;
+ bool filter_cam_workaround =
+ cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10 && cmd_buffer->qf == RADV_QUEUE_GENERAL;
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0) | PKT3_RESET_FILTER_CAM(filter_cam_workaround));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
result = ws->buffer_create(
ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,
- RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM |
- RADEON_FLAG_VA_UNCACHED, RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo);
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_VA_UNCACHED,
+ RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo);
if (result != VK_SUCCESS)
return false;
uint32_t value;
if (ws->read_registers(ws, offset, 1, &value))
- ac_dump_reg(f, device->physical_device->rad_info.gfx_level,
- device->physical_device->rad_info.family, offset, value, ~0);
+ ac_dump_reg(f, device->physical_device->rad_info.gfx_level, device->physical_device->rad_info.family, offset,
+ value, ~0);
}
static void
}
static void
-radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family,
- const uint32_t *desc, FILE *f)
+radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f)
{
fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
for (unsigned j = 0; j < 4; j++)
}
static void
-radv_dump_image_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family,
- const uint32_t *desc, FILE *f)
+radv_dump_image_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f)
{
- unsigned sq_img_rsrc_word0 =
- gfx_level >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
+ unsigned sq_img_rsrc_word0 = gfx_level >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n");
for (unsigned j = 0; j < 8; j++)
}
static void
-radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family,
- const uint32_t *desc, FILE *f)
+radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f)
{
fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
for (unsigned j = 0; j < 4; j++) {
}
static void
-radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level,
- enum radeon_family family, const uint32_t *desc,
- FILE *f)
+radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family,
+ const uint32_t *desc, FILE *f)
{
radv_dump_image_descriptor(gfx_level, family, desc, f);
radv_dump_sampler_descriptor(gfx_level, family, desc + 16, f);
}
static void
-radv_dump_descriptor_set(const struct radv_device *device, const struct radv_descriptor_set *set, unsigned id,
- FILE *f)
+radv_dump_descriptor_set(const struct radv_device *device, const struct radv_descriptor_set *set, unsigned id, FILE *f)
{
enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
enum radeon_family family = device->physical_device->rad_info.family;
/* Split a disassembly string into lines and add them to the array pointed
* to by "instructions". */
static void
-si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num,
- struct radv_shader_inst *instructions)
+si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num, struct radv_shader_inst *instructions)
{
struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
char *next;
/* More than 16 chars after ";" means the instruction is 8 bytes long. */
inst->size = next - semicolon > 16 ? 8 : 4;
- snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
- " [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset,
- inst->size);
+ snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len, " [PC=0x%" PRIx64 ", off=%u, size=%u]",
+ start_addr + inst->offset, inst->offset, inst->size);
last_inst = inst;
(*num)++;
}
static void
-radv_dump_annotated_shader(const struct radv_shader *shader, gl_shader_stage stage,
- struct ac_wave_info *waves, unsigned num_waves, FILE *f)
+radv_dump_annotated_shader(const struct radv_shader *shader, gl_shader_stage stage, struct ac_wave_info *waves,
+ unsigned num_waves, FILE *f)
{
uint64_t start_addr, end_addr;
unsigned i;
* Buffer size / 4 is the upper bound of the instruction count.
*/
unsigned num_inst = 0;
- struct radv_shader_inst *instructions =
- calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
+ struct radv_shader_inst *instructions = calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
- fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
- radv_get_shader_name(&shader->info, stage));
+ fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", radv_get_shader_name(&shader->info, stage));
/* Print instructions with annotations. */
for (i = 0; i < num_inst; i++) {
}
static void
-radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline,
- struct radv_shader *shader, gl_shader_stage stage, const char *dump_dir, FILE *f)
+radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader,
+ gl_shader_stage stage, const char *dump_dir, FILE *f)
{
if (!shader)
return;
fprintf(f, "NIR:\n%s\n", shader->nir_string);
}
- fprintf(f, "%s IR:\n%s\n", device->physical_device->use_llvm ? "LLVM" : "ACO",
- shader->ir_string);
+ fprintf(f, "%s IR:\n%s\n", device->physical_device->use_llvm ? "LLVM" : "ACO", shader->ir_string);
fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
radv_dump_shader_stats(device, pipeline, shader, stage, f);
}
static void
-radv_dump_vertex_descriptors(const struct radv_device *device,
- const struct radv_graphics_pipeline *pipeline, FILE *f)
+radv_dump_vertex_descriptors(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, FILE *f)
{
struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
void *ptr = (uint64_t *)device->trace_id_ptr;
if (!count)
return;
- fprintf(f, "Num vertex %s: %d\n",
- vs->info.vs.use_per_attribute_vb_descs ? "attributes" : "bindings", count);
+ fprintf(f, "Num vertex %s: %d\n", vs->info.vs.use_per_attribute_vb_descs ? "attributes" : "bindings", count);
for (uint32_t i = 0; i < count; i++) {
uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
uint64_t va = 0;
}
static void
-radv_dump_vs_prolog(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline,
- FILE *f)
+radv_dump_vs_prolog(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, FILE *f)
{
struct radv_shader_part *vs_prolog = radv_get_saved_vs_prolog(device);
struct radv_shader *vs_shader = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
pipeline = radv_get_saved_pipeline(queue->device, ring);
if (pipeline) {
if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
- struct radv_graphics_pipeline *graphics_pipeline =
- radv_pipeline_to_graphics(pipeline);
+ struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
radv_dump_vs_prolog(device, graphics_pipeline, f);
while (stages) {
int stage = u_bit_scan(&stages);
- radv_dump_shader(device, &graphics_pipeline->base, graphics_pipeline->base.shaders[stage],
- stage, dump_dir, f);
+ radv_dump_shader(device, &graphics_pipeline->base, graphics_pipeline->base.shaders[stage], stage, dump_dir,
+ f);
}
} else if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
for (unsigned i = 0; i < rt_pipeline->stage_count; i++) {
if (radv_ray_tracing_stage_is_compiled(&rt_pipeline->stages[i])) {
- struct radv_shader *shader =
- container_of(rt_pipeline->stages[i].shader, struct radv_shader, base);
+ struct radv_shader *shader = container_of(rt_pipeline->stages[i].shader, struct radv_shader, base);
radv_dump_shader(device, pipeline, shader, shader->info.stage, dump_dir, f);
}
}
- radv_dump_shader(device, pipeline, pipeline->shaders[MESA_SHADER_INTERSECTION],
- MESA_SHADER_INTERSECTION, dump_dir, f);
+ radv_dump_shader(device, pipeline, pipeline->shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION,
+ dump_dir, f);
} else {
- struct radv_compute_pipeline *compute_pipeline =
- radv_pipeline_to_compute(pipeline);
+ struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
radv_dump_shader(device, &compute_pipeline->base, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE],
MESA_SHADER_COMPUTE, dump_dir, f);
fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
- struct radv_graphics_pipeline *graphics_pipeline =
- radv_pipeline_to_graphics(pipeline);
+ struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
/* Dump annotated active graphics shaders. */
unsigned stages = graphics_pipeline->active_stages;
while (stages) {
int stage = u_bit_scan(&stages);
- radv_dump_annotated_shader(graphics_pipeline->base.shaders[stage], stage, waves,
- num_waves, f);
+ radv_dump_annotated_shader(graphics_pipeline->base.shaders[stage], stage, waves, num_waves, f);
}
} else if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
for (unsigned i = 0; i < rt_pipeline->stage_count; i++) {
if (radv_ray_tracing_stage_is_compiled(&rt_pipeline->stages[i])) {
- struct radv_shader *shader =
- container_of(rt_pipeline->stages[i].shader, struct radv_shader, base);
+ struct radv_shader *shader = container_of(rt_pipeline->stages[i].shader, struct radv_shader, base);
radv_dump_annotated_shader(shader, shader->info.stage, waves, num_waves, f);
}
}
- radv_dump_annotated_shader(pipeline->shaders[MESA_SHADER_INTERSECTION],
- MESA_SHADER_INTERSECTION, waves, num_waves, f);
+ radv_dump_annotated_shader(pipeline->shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION, waves,
+ num_waves, f);
} else {
- struct radv_compute_pipeline *compute_pipeline =
- radv_pipeline_to_compute(pipeline);
+ struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
- radv_dump_annotated_shader(compute_pipeline->base.shaders[MESA_SHADER_COMPUTE],
- MESA_SHADER_COMPUTE, waves, num_waves, f);
+ radv_dump_annotated_shader(compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE, waves,
+ num_waves, f);
}
/* Print waves executing shaders that are not currently bound. */
fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");
found = true;
}
- fprintf(f,
- " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64
- "\n",
+ fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64 "\n",
waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,
waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);
}
}
if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
- struct radv_graphics_pipeline *graphics_pipeline =
- radv_pipeline_to_graphics(pipeline);
+ struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
radv_dump_vertex_descriptors(device, graphics_pipeline, f);
}
radv_dump_descriptors(queue->device, f);
fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),
- VK_VERSION_MINOR(instance->vk.app_info.api_version),
- VK_VERSION_PATCH(instance->vk.app_info.api_version));
+ VK_VERSION_MINOR(instance->vk.app_info.api_version), VK_VERSION_PATCH(instance->vk.app_info.api_version));
radv_dump_enabled_options(device, f);
}
#endif
#ifdef _WIN32
- fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", device->physical_device->marketing_name,
- info->drm_major, info->drm_minor, info->drm_patchlevel);
+ fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", device->physical_device->marketing_name, info->drm_major,
+ info->drm_minor, info->drm_patchlevel);
#else
if (uname(&uname_data) == 0)
snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
- fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", device->physical_device->marketing_name,
- info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version);
+ fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", device->physical_device->marketing_name, info->drm_major,
+ info->drm_minor, info->drm_patchlevel, kernel_version);
#endif
}
if (ring != AMD_IP_GFX)
return;
- sprintf(cmd, "umr -RS %s 2>&1",
- device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
+ sprintf(cmd, "umr -RS %s 2>&1", device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
fprintf(f, "\nUMR GFX ring:\n\n");
radv_dump_cmd(cmd, f);
bool hang_occurred = radv_gpu_hang_occurred(queue, ring);
bool vm_fault_occurred = false;
if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
- vm_fault_occurred = ac_vm_fault_occurred(device->physical_device->rad_info.gfx_level,
- &device->dmesg_timestamp, &addr);
+ vm_fault_occurred =
+ ac_vm_fault_occurred(device->physical_device->rad_info.gfx_level, &device->dmesg_timestamp, &addr);
if (!hang_occurred && !vm_fault_occurred)
return;
timep = os_localtime(&raw_time, &result);
strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
- snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."),
- getpid(), buf_time);
+ snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."), getpid(),
+ buf_time);
if (mkdir(dump_dir, 0774) && errno != EEXIST) {
fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);
abort();
if (result != VK_SUCCESS)
return false;
- result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
- RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
- RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo);
+ result = ws->buffer_create(
+ ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
+ RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo);
if (result != VK_SUCCESS)
return false;
* Buffer size / 4 is the upper bound of the instruction count.
*/
unsigned num_inst = 0;
- struct radv_shader_inst *instructions =
- calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
+ struct radv_shader_inst *instructions = calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
/* Split the disassembly string into instructions. */
si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
fprintf(stderr, "\nHardware registers:\n");
if (device->physical_device->rad_info.gfx_level >= GFX10) {
- ac_dump_reg(stderr, gfx_level, family, R_000408_SQ_WAVE_STATUS,
- regs->status, ~0);
- ac_dump_reg(stderr, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS,
- regs->trap_sts, ~0);
- ac_dump_reg(stderr, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1,
- regs->hw_id, ~0);
- ac_dump_reg(stderr, gfx_level, family, R_00041C_SQ_WAVE_IB_STS,
- regs->ib_sts, ~0);
+ ac_dump_reg(stderr, gfx_level, family, R_000408_SQ_WAVE_STATUS, regs->status, ~0);
+ ac_dump_reg(stderr, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
+ ac_dump_reg(stderr, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, regs->hw_id, ~0);
+ ac_dump_reg(stderr, gfx_level, family, R_00041C_SQ_WAVE_IB_STS, regs->ib_sts, ~0);
} else {
- ac_dump_reg(stderr, gfx_level, family, R_000048_SQ_WAVE_STATUS,
- regs->status, ~0);
- ac_dump_reg(stderr, gfx_level, family, R_00004C_SQ_WAVE_TRAPSTS,
- regs->trap_sts, ~0);
- ac_dump_reg(stderr, gfx_level, family, R_000050_SQ_WAVE_HW_ID,
- regs->hw_id, ~0);
- ac_dump_reg(stderr, gfx_level, family, R_00005C_SQ_WAVE_IB_STS,
- regs->ib_sts, ~0);
+ ac_dump_reg(stderr, gfx_level, family, R_000048_SQ_WAVE_STATUS, regs->status, ~0);
+ ac_dump_reg(stderr, gfx_level, family, R_00004C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
+ ac_dump_reg(stderr, gfx_level, family, R_000050_SQ_WAVE_HW_ID, regs->hw_id, ~0);
+ ac_dump_reg(stderr, gfx_level, family, R_00005C_SQ_WAVE_IB_STS, regs->ib_sts, ~0);
}
fprintf(stderr, "\n\n");
}
uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
- fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht,
- pc_rewind);
+ fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht, pc_rewind);
radv_dump_faulty_shader(device, pc);
radv_descriptor_type_buffer_count(VkDescriptorType type)
{
switch (type) {
- case VK_DESCRIPTOR_TYPE_SAMPLER:
- case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
- case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
- return 0;
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
- return 3;
- default:
- return 1;
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
+ case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
+ return 0;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
+ return 3;
+ default:
+ return 1;
}
}
if (!samplers)
return false;
for (uint32_t i = 1; i < count; ++i) {
- if (memcmp(radv_sampler_from_handle(samplers[0])->state,
- radv_sampler_from_handle(samplers[i])->state, 16)) {
+ if (memcmp(radv_sampler_from_handle(samplers[0])->state, radv_sampler_from_handle(samplers[i])->state, 16)) {
return false;
}
}
}
static bool
-radv_mutable_descriptor_type_size_alignment(const VkMutableDescriptorTypeListVALVE *list,
- uint64_t *out_size, uint64_t *out_align)
+radv_mutable_descriptor_type_size_alignment(const VkMutableDescriptorTypeListVALVE *list, uint64_t *out_size,
+ uint64_t *out_align)
{
uint32_t max_size = 0;
uint32_t max_align = 0;
VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkDescriptorSetLayout *pSetLayout)
+ const VkAllocationCallbacks *pAllocator, VkDescriptorSetLayout *pSetLayout)
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_descriptor_set_layout *set_layout;
bool has_ycbcr_sampler = false;
for (unsigned i = 0; i < pCreateInfo->pBindings[j].descriptorCount; ++i) {
- if (radv_sampler_from_handle(pCreateInfo->pBindings[j].pImmutableSamplers[i])
- ->ycbcr_sampler)
+ if (radv_sampler_from_handle(pCreateInfo->pBindings[j].pImmutableSamplers[i])->ycbcr_sampler)
has_ycbcr_sampler = true;
}
ycbcr_sampler_offsets = samplers + 4 * immutable_sampler_count;
set_layout->ycbcr_sampler_offsets_offset = (char *)ycbcr_sampler_offsets - (char *)set_layout;
- uintptr_t first_ycbcr_sampler_offset =
- (uintptr_t)ycbcr_sampler_offsets + sizeof(uint32_t) * num_bindings;
- first_ycbcr_sampler_offset =
- ALIGN(first_ycbcr_sampler_offset, alignof(struct vk_ycbcr_conversion_state));
+ uintptr_t first_ycbcr_sampler_offset = (uintptr_t)ycbcr_sampler_offsets + sizeof(uint32_t) * num_bindings;
+ first_ycbcr_sampler_offset = ALIGN(first_ycbcr_sampler_offset, alignof(struct vk_ycbcr_conversion_state));
ycbcr_samplers = (struct vk_ycbcr_conversion_state *)first_ycbcr_sampler_offset;
} else
set_layout->ycbcr_sampler_offsets_offset = 0;
VkDescriptorSetLayoutBinding *bindings = NULL;
- VkResult result =
- vk_create_sorted_bindings(pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings);
+ VkResult result = vk_create_sorted_bindings(pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings);
if (result != VK_SUCCESS) {
vk_descriptor_set_layout_unref(&device->vk, &set_layout->vk);
return vk_error(device, result);
uint32_t first_alignment = 32;
if (pCreateInfo->bindingCount > 0) {
- uint32_t last_alignment =
- radv_descriptor_alignment(bindings[pCreateInfo->bindingCount - 1].descriptorType);
- if (bindings[pCreateInfo->bindingCount - 1].descriptorType ==
- VK_DESCRIPTOR_TYPE_MUTABLE_EXT) {
+ uint32_t last_alignment = radv_descriptor_alignment(bindings[pCreateInfo->bindingCount - 1].descriptorType);
+ if (bindings[pCreateInfo->bindingCount - 1].descriptorType == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) {
uint64_t mutable_size = 0, mutable_align = 0;
radv_mutable_descriptor_type_size_alignment(
- &mutable_info->pMutableDescriptorTypeLists[pCreateInfo->bindingCount - 1],
- &mutable_size, &mutable_align);
+ &mutable_info->pMutableDescriptorTypeLists[pCreateInfo->bindingCount - 1], &mutable_size, &mutable_align);
last_alignment = mutable_align;
}
/* main image + fmask */
uint32_t max_sampled_image_descriptors = 2;
- if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER &&
- binding->pImmutableSamplers) {
+ if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER && binding->pImmutableSamplers) {
for (unsigned i = 0; i < binding->descriptorCount; ++i) {
struct vk_ycbcr_conversion *conversion =
radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler;
if (conversion) {
has_ycbcr_sampler = true;
max_sampled_image_descriptors =
- MAX2(max_sampled_image_descriptors,
- vk_format_get_plane_count(conversion->state.format));
+ MAX2(max_sampled_image_descriptors, vk_format_get_plane_count(conversion->state.format));
}
}
}
break;
case VK_DESCRIPTOR_TYPE_MUTABLE_EXT: {
uint64_t mutable_size = 0, mutable_align = 0;
- radv_mutable_descriptor_type_size_alignment(
- &mutable_info->pMutableDescriptorTypeLists[j], &mutable_size, &mutable_align);
+ radv_mutable_descriptor_type_size_alignment(&mutable_info->pMutableDescriptorTypeLists[j], &mutable_size,
+ &mutable_align);
assert(mutable_size && mutable_align);
set_layout->binding[b].size = mutable_size;
alignment = mutable_align;
break;
}
- if ((pass == 0 && alignment != first_alignment) ||
- (pass == 1 && alignment == first_alignment))
+ if ((pass == 0 && alignment != first_alignment) || (pass == 1 && alignment == first_alignment))
continue;
set_layout->size = align(set_layout->size, alignment);
set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count;
if (variable_flags && binding->binding < variable_flags->bindingCount &&
- (variable_flags->pBindingFlags[binding->binding] &
- VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
- assert(
- !binding->pImmutableSamplers); /* Terribly ill defined how many samplers are valid */
+ (variable_flags->pBindingFlags[binding->binding] & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
+ assert(!binding->pImmutableSamplers); /* Terribly ill defined how many samplers are valid */
assert(binding->binding == num_bindings - 1);
set_layout->has_variable_descriptors = true;
/* Do not optimize space for descriptor buffers and embedded samplers, otherwise the set
* layout size/offset are incorrect.
*/
- if (!(pCreateInfo->flags &
- (VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT |
- VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT))) {
- set_layout->binding[b].immutable_samplers_equal = has_equal_immutable_samplers(
- binding->pImmutableSamplers, binding->descriptorCount);
+ if (!(pCreateInfo->flags & (VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT |
+ VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT))) {
+ set_layout->binding[b].immutable_samplers_equal =
+ has_equal_immutable_samplers(binding->pImmutableSamplers, binding->descriptorCount);
}
for (uint32_t i = 0; i < binding->descriptorCount; i++)
- memcpy(samplers + 4 * i,
- &radv_sampler_from_handle(binding->pImmutableSamplers[i])->state, 16);
+ memcpy(samplers + 4 * i, &radv_sampler_from_handle(binding->pImmutableSamplers[i])->state, 16);
/* Don't reserve space for the samplers if they're not accessed. */
if (set_layout->binding[b].immutable_samplers_equal) {
ycbcr_sampler_offsets[b] = (const char *)ycbcr_samplers - (const char *)set_layout;
for (uint32_t i = 0; i < binding->descriptorCount; i++) {
if (radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler)
- ycbcr_samplers[i] = radv_sampler_from_handle(binding->pImmutableSamplers[i])
- ->ycbcr_sampler->state;
+ ycbcr_samplers[i] = radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler->state;
else
ycbcr_samplers[i].format = VK_FORMAT_UNDEFINED;
}
* carefully constructed to not have pointers so a full hash instead of a per-field hash
* should be ok.
*/
- uint32_t hash_offset =
- offsetof(struct radv_descriptor_set_layout, hash) + sizeof(set_layout->hash);
+ uint32_t hash_offset = offsetof(struct radv_descriptor_set_layout, hash) + sizeof(set_layout->hash);
_mesa_sha1_compute((const char *)set_layout + hash_offset, size - hash_offset, set_layout->hash);
*pSetLayout = radv_descriptor_set_layout_to_handle(set_layout);
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetDescriptorSetLayoutSupport(VkDevice device,
- const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
+radv_GetDescriptorSetLayoutSupport(VkDevice device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
VkDescriptorSetLayoutSupport *pSupport)
{
VkDescriptorSetLayoutBinding *bindings = NULL;
- VkResult result =
- vk_create_sorted_bindings(pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings);
+ VkResult result = vk_create_sorted_bindings(pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings);
if (result != VK_SUCCESS) {
pSupport->supported = false;
return;
const VkDescriptorSetLayoutBindingFlagsCreateInfo *variable_flags =
vk_find_struct_const(pCreateInfo->pNext, DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
- VkDescriptorSetVariableDescriptorCountLayoutSupport *variable_count = vk_find_struct(
- pSupport->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT);
+ VkDescriptorSetVariableDescriptorCountLayoutSupport *variable_count =
+ vk_find_struct(pSupport->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT);
const VkMutableDescriptorTypeCreateInfoEXT *mutable_info =
vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT);
if (variable_count) {
uint32_t first_alignment = 32;
if (pCreateInfo->bindingCount > 0) {
- uint32_t last_alignment =
- radv_descriptor_alignment(bindings[pCreateInfo->bindingCount - 1].descriptorType);
- if (bindings[pCreateInfo->bindingCount - 1].descriptorType ==
- VK_DESCRIPTOR_TYPE_MUTABLE_EXT) {
+ uint32_t last_alignment = radv_descriptor_alignment(bindings[pCreateInfo->bindingCount - 1].descriptorType);
+ if (bindings[pCreateInfo->bindingCount - 1].descriptorType == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) {
uint64_t mutable_size = 0, mutable_align = 0;
radv_mutable_descriptor_type_size_alignment(
- &mutable_info->pMutableDescriptorTypeLists[pCreateInfo->bindingCount - 1],
- &mutable_size, &mutable_align);
+ &mutable_info->pMutableDescriptorTypeLists[pCreateInfo->bindingCount - 1], &mutable_size, &mutable_align);
last_alignment = mutable_align;
}
descriptor_count = 1;
break;
case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
- if (!radv_mutable_descriptor_type_size_alignment(
- &mutable_info->pMutableDescriptorTypeLists[i], &descriptor_size,
- &descriptor_alignment)) {
+ if (!radv_mutable_descriptor_type_size_alignment(&mutable_info->pMutableDescriptorTypeLists[i],
+ &descriptor_size, &descriptor_alignment)) {
supported = false;
}
break;
supported = false;
}
if (variable_flags && binding->binding < variable_flags->bindingCount && variable_count &&
- (variable_flags->pBindingFlags[binding->binding] &
- VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
+ (variable_flags->pBindingFlags[binding->binding] & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
variable_count->maxVariableDescriptorCount = MIN2(UINT32_MAX, max_count);
}
size += descriptor_count * descriptor_size;
* just multiple descriptor set layouts pasted together.
*/
void
-radv_pipeline_layout_init(struct radv_device *device, struct radv_pipeline_layout *layout,
- bool independent_sets)
+radv_pipeline_layout_init(struct radv_device *device, struct radv_pipeline_layout *layout, bool independent_sets)
{
memset(layout, 0, sizeof(*layout));
VKAPI_ATTR VkResult VKAPI_CALL
radv_CreatePipelineLayout(VkDevice _device, const VkPipelineLayoutCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkPipelineLayout *pPipelineLayout)
+ const VkAllocationCallbacks *pAllocator, VkPipelineLayout *pPipelineLayout)
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_pipeline_layout *layout;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO);
- layout = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*layout), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ layout = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*layout), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (layout == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
- radv_pipeline_layout_init(device, layout,
- pCreateInfo->flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT);
+ radv_pipeline_layout_init(device, layout, pCreateInfo->flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT);
layout->num_sets = pCreateInfo->setLayoutCount;
}
VKAPI_ATTR void VKAPI_CALL
-radv_DestroyPipelineLayout(VkDevice _device, VkPipelineLayout _pipelineLayout,
- const VkAllocationCallbacks *pAllocator)
+radv_DestroyPipelineLayout(VkDevice _device, VkPipelineLayout _pipelineLayout, const VkAllocationCallbacks *pAllocator)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, _pipelineLayout);
struct radv_descriptor_set *set;
uint32_t buffer_count = layout->buffer_count;
if (variable_count) {
- unsigned stride =
- radv_descriptor_type_buffer_count(layout->binding[layout->binding_count - 1].type);
- buffer_count =
- layout->binding[layout->binding_count - 1].buffer_offset + *variable_count * stride;
+ unsigned stride = radv_descriptor_type_buffer_count(layout->binding[layout->binding_count - 1].type);
+ buffer_count = layout->binding[layout->binding_count - 1].buffer_offset + *variable_count * stride;
}
- unsigned range_offset =
- sizeof(struct radv_descriptor_set_header) + sizeof(struct radeon_winsys_bo *) * buffer_count;
+ unsigned range_offset = sizeof(struct radv_descriptor_set_header) + sizeof(struct radeon_winsys_bo *) * buffer_count;
const unsigned dynamic_offset_count = layout->dynamic_offset_count;
- unsigned mem_size =
- range_offset + sizeof(struct radv_descriptor_range) * dynamic_offset_count;
+ unsigned mem_size = range_offset + sizeof(struct radv_descriptor_range) * dynamic_offset_count;
if (pool->host_memory_base) {
if (pool->host_memory_end - pool->host_memory_ptr < mem_size)
vk_object_base_init(&device->vk, &set->header.base, VK_OBJECT_TYPE_DESCRIPTOR_SET);
if (dynamic_offset_count) {
- set->header.dynamic_descriptors =
- (struct radv_descriptor_range *)((uint8_t *)set + range_offset);
+ set->header.dynamic_descriptors = (struct radv_descriptor_range *)((uint8_t *)set + range_offset);
}
set->header.layout = layout;
uint32_t layout_size = layout->size;
if (variable_count) {
uint32_t stride = layout->binding[layout->binding_count - 1].size;
- if (layout->binding[layout->binding_count - 1].type ==
- VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
+ if (layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
stride = 1;
layout_size = layout->binding[layout->binding_count - 1].offset + *variable_count * stride;
set->header.bo = pool->bo;
set->header.mapped_ptr = (uint32_t *)(pool->mapped_ptr + offset);
set->header.va = pool->bo ? (radv_buffer_get_va(set->header.bo) + offset) : 0;
- memmove(&pool->entries[index + 1], &pool->entries[index],
- sizeof(pool->entries[0]) * (pool->entry_count - index));
+ memmove(&pool->entries[index + 1], &pool->entries[index], sizeof(pool->entries[0]) * (pool->entry_count - index));
pool->entries[index].offset = offset;
pool->entries[index].size = layout_size;
pool->entries[index].set = set;
if (layout->has_immutable_samplers) {
for (unsigned i = 0; i < layout->binding_count; ++i) {
- if (!layout->binding[i].immutable_samplers_offset ||
- layout->binding[i].immutable_samplers_equal)
+ if (!layout->binding[i].immutable_samplers_offset || layout->binding[i].immutable_samplers_equal)
continue;
unsigned offset = layout->binding[i].offset / 4;
if (free_bo && !pool->host_memory_base) {
for (int i = 0; i < pool->entry_count; ++i) {
if (pool->entries[i].set == set) {
- memmove(&pool->entries[i], &pool->entries[i + 1],
- sizeof(pool->entries[i]) * (pool->entry_count - i - 1));
+ memmove(&pool->entries[i], &pool->entries[i + 1], sizeof(pool->entries[i]) * (pool->entry_count - i - 1));
--pool->entry_count;
break;
}
}
VkResult
-radv_create_descriptor_pool(struct radv_device *device,
- const VkDescriptorPoolCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkDescriptorPool *pDescriptorPool, bool is_internal)
+radv_create_descriptor_pool(struct radv_device *device, const VkDescriptorPoolCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkDescriptorPool *pDescriptorPool,
+ bool is_internal)
{
struct radv_descriptor_pool *pool;
uint64_t size = sizeof(struct radv_descriptor_pool);
const VkMutableDescriptorTypeCreateInfoEXT *mutable_info =
vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT);
- vk_foreach_struct_const(ext, pCreateInfo->pNext)
- {
+ vk_foreach_struct_const (ext, pCreateInfo->pNext) {
switch (ext->sType) {
case VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO: {
const VkDescriptorPoolInlineUniformBlockCreateInfo *info =
uint64_t num_16byte_descriptors = 0;
for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
bo_count += radv_descriptor_type_buffer_count(pCreateInfo->pPoolSizes[i].type) *
- pCreateInfo->pPoolSizes[i].descriptorCount;
+ pCreateInfo->pPoolSizes[i].descriptorCount;
switch (pCreateInfo->pPoolSizes[i].type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
* we must allocate enough for any supported mutable descriptor type, i.e. 64 bytes. */
if (mutable_info && i < mutable_info->mutableDescriptorTypeListCount) {
uint64_t mutable_size, mutable_alignment;
- if (radv_mutable_descriptor_type_size_alignment(
- &mutable_info->pMutableDescriptorTypeLists[i], &mutable_size,
- &mutable_alignment)) {
+ if (radv_mutable_descriptor_type_size_alignment(&mutable_info->pMutableDescriptorTypeLists[i],
+ &mutable_size, &mutable_alignment)) {
/* 32 as we may need to align for images */
mutable_size = align(mutable_size, 32);
bo_size += mutable_size * pCreateInfo->pPoolSizes[i].descriptorCount;
if (bo_size) {
if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_VALVE)) {
- enum radeon_bo_flag flags = RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY |
- RADEON_FLAG_32BIT;
+ enum radeon_bo_flag flags = RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT;
if (device->instance->zero_vram)
flags |= RADEON_FLAG_ZERO_VRAM;
- VkResult result = device->ws->buffer_create(
- device->ws, bo_size, 32, RADEON_DOMAIN_VRAM, flags, RADV_BO_PRIORITY_DESCRIPTOR, 0,
- &pool->bo);
+ VkResult result = device->ws->buffer_create(device->ws, bo_size, 32, RADEON_DOMAIN_VRAM, flags,
+ RADV_BO_PRIORITY_DESCRIPTOR, 0, &pool->bo);
if (result != VK_SUCCESS) {
radv_destroy_descriptor_pool(device, pAllocator, pool);
return vk_error(device, result);
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
} else {
- pool->host_bo =
- vk_alloc2(&device->vk.alloc, pAllocator, bo_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ pool->host_bo = vk_alloc2(&device->vk.alloc, pAllocator, bo_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!pool->host_bo) {
radv_destroy_descriptor_pool(device, pAllocator, pool);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateDescriptorPool(VkDevice _device, const VkDescriptorPoolCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkDescriptorPool *pDescriptorPool)
+ const VkAllocationCallbacks *pAllocator, VkDescriptorPool *pDescriptorPool)
{
RADV_FROM_HANDLE(radv_device, device, _device);
return radv_create_descriptor_pool(device, pCreateInfo, pAllocator, pDescriptorPool, false);
}
VKAPI_ATTR void VKAPI_CALL
-radv_DestroyDescriptorPool(VkDevice _device, VkDescriptorPool _pool,
- const VkAllocationCallbacks *pAllocator)
+radv_DestroyDescriptorPool(VkDevice _device, VkDescriptorPool _pool, const VkAllocationCallbacks *pAllocator)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_descriptor_pool, pool, _pool);
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_ResetDescriptorPool(VkDevice _device, VkDescriptorPool descriptorPool,
- VkDescriptorPoolResetFlags flags)
+radv_ResetDescriptorPool(VkDevice _device, VkDescriptorPool descriptorPool, VkDescriptorPoolResetFlags flags)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool);
uint32_t i;
struct radv_descriptor_set *set = NULL;
- const VkDescriptorSetVariableDescriptorCountAllocateInfo *variable_counts = vk_find_struct_const(
- pAllocateInfo->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO);
+ const VkDescriptorSetVariableDescriptorCountAllocateInfo *variable_counts =
+ vk_find_struct_const(pAllocateInfo->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO);
const uint32_t zero = 0;
/* allocate a set of buffers for each shader to contain descriptors */
}
static ALWAYS_INLINE void
-write_texel_buffer_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
- unsigned *dst, struct radeon_winsys_bo **buffer_list,
- const VkBufferView _buffer_view)
+write_texel_buffer_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, unsigned *dst,
+ struct radeon_winsys_bo **buffer_list, const VkBufferView _buffer_view)
{
RADV_FROM_HANDLE(radv_buffer_view, buffer_view, _buffer_view);
return;
}
- uint32_t rsrc_word3 =
- S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+ uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->rad_info.gfx_level >= GFX11) {
- rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
+ rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
- rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+ rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
+ S_008F0C_RESOURCE_LEVEL(1);
} else {
- rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ rsrc_word3 |=
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
dst[0] = va;
}
static ALWAYS_INLINE void
-write_buffer_descriptor_impl(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
- unsigned *dst, struct radeon_winsys_bo **buffer_list,
- const VkDescriptorBufferInfo *buffer_info)
+write_buffer_descriptor_impl(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, unsigned *dst,
+ struct radeon_winsys_bo **buffer_list, const VkDescriptorBufferInfo *buffer_info)
{
RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
uint64_t va = 0, range = 0;
static ALWAYS_INLINE void
write_dynamic_buffer_descriptor(struct radv_device *device, struct radv_descriptor_range *range,
- struct radeon_winsys_bo **buffer_list,
- const VkDescriptorBufferInfo *buffer_info)
+ struct radeon_winsys_bo **buffer_list, const VkDescriptorBufferInfo *buffer_info)
{
RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
uint64_t va;
}
static ALWAYS_INLINE void
-write_image_descriptor_impl(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
- unsigned size, unsigned *dst, struct radeon_winsys_bo **buffer_list,
- VkDescriptorType descriptor_type, const VkDescriptorImageInfo *image_info)
+write_image_descriptor_impl(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, unsigned size,
+ unsigned *dst, struct radeon_winsys_bo **buffer_list, VkDescriptorType descriptor_type,
+ const VkDescriptorImageInfo *image_info)
{
RADV_FROM_HANDLE(radv_image_view, iview, image_info->imageView);
return;
}
- const uint32_t max_bindings = sizeof(iview->image->bindings) /
- sizeof(iview->image->bindings[0]);
+ const uint32_t max_bindings = sizeof(iview->image->bindings) / sizeof(iview->image->bindings[0]);
for (uint32_t b = 0; b < max_bindings; b++) {
if (cmd_buffer) {
if (iview->image->bindings[b].bo)
}
static ALWAYS_INLINE void
-write_combined_image_sampler_descriptor(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer, unsigned sampler_offset,
- unsigned *dst, struct radeon_winsys_bo **buffer_list,
- VkDescriptorType descriptor_type,
- const VkDescriptorImageInfo *image_info, bool has_sampler)
+write_combined_image_sampler_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
+ unsigned sampler_offset, unsigned *dst, struct radeon_winsys_bo **buffer_list,
+ VkDescriptorType descriptor_type, const VkDescriptorImageInfo *image_info,
+ bool has_sampler)
{
- write_image_descriptor_impl(device, cmd_buffer, sampler_offset, dst, buffer_list, descriptor_type,
- image_info);
+ write_image_descriptor_impl(device, cmd_buffer, sampler_offset, dst, buffer_list, descriptor_type, image_info);
/* copy over sampler state */
if (has_sampler) {
RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler);
static ALWAYS_INLINE void
radv_update_descriptor_sets_impl(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
VkDescriptorSet dstSetOverride, uint32_t descriptorWriteCount,
- const VkWriteDescriptorSet *pDescriptorWrites,
- uint32_t descriptorCopyCount,
+ const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount,
const VkCopyDescriptorSet *pDescriptorCopies)
{
uint32_t i, j;
for (i = 0; i < descriptorWriteCount; i++) {
const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i];
- RADV_FROM_HANDLE(radv_descriptor_set, set,
- dstSetOverride ? dstSetOverride : writeset->dstSet);
+ RADV_FROM_HANDLE(radv_descriptor_set, set, dstSetOverride ? dstSetOverride : writeset->dstSet);
const struct radv_descriptor_set_binding_layout *binding_layout =
set->header.layout->binding + writeset->dstBinding;
uint32_t *ptr = set->header.mapped_ptr;
* allocated, so if we are writing push descriptors we have to copy the
* immutable samplers into them now.
*/
- const bool copy_immutable_samplers = cmd_buffer &&
- binding_layout->immutable_samplers_offset &&
- !binding_layout->immutable_samplers_equal;
+ const bool copy_immutable_samplers =
+ cmd_buffer && binding_layout->immutable_samplers_offset && !binding_layout->immutable_samplers_equal;
const uint32_t *samplers = radv_immutable_samplers(set->header.layout, binding_layout);
const VkWriteDescriptorSetAccelerationStructureKHR *accel_structs = NULL;
ptr += binding_layout->offset / 4;
if (writeset->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
- write_block_descriptor(device, cmd_buffer, (uint8_t *)ptr + writeset->dstArrayElement,
- writeset);
+ write_block_descriptor(device, cmd_buffer, (uint8_t *)ptr + writeset->dstArrayElement, writeset);
continue;
} else if (writeset->descriptorType == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
- accel_structs =
- vk_find_struct_const(writeset->pNext, WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR);
+ accel_structs = vk_find_struct_const(writeset->pNext, WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR);
}
ptr += binding_layout->size * writeset->dstArrayElement / 4;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
unsigned idx = writeset->dstArrayElement + j;
idx += binding_layout->dynamic_offset_offset;
- assert(!(set->header.layout->flags &
- VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
- write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx,
- buffer_list, writeset->pBufferInfo + j);
+ assert(!(set->header.layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
+ write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx, buffer_list,
+ writeset->pBufferInfo + j);
break;
}
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- write_buffer_descriptor_impl(device, cmd_buffer, ptr, buffer_list,
- writeset->pBufferInfo + j);
+ write_buffer_descriptor_impl(device, cmd_buffer, ptr, buffer_list, writeset->pBufferInfo + j);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- write_texel_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
- writeset->pTexelBufferView[j]);
+ write_texel_buffer_descriptor(device, cmd_buffer, ptr, buffer_list, writeset->pTexelBufferView[j]);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- write_image_descriptor_impl(device, cmd_buffer, 32, ptr, buffer_list,
- writeset->descriptorType, writeset->pImageInfo + j);
+ write_image_descriptor_impl(device, cmd_buffer, 32, ptr, buffer_list, writeset->descriptorType,
+ writeset->pImageInfo + j);
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- write_image_descriptor_impl(device, cmd_buffer, 64, ptr, buffer_list,
- writeset->descriptorType, writeset->pImageInfo + j);
+ write_image_descriptor_impl(device, cmd_buffer, 64, ptr, buffer_list, writeset->descriptorType,
+ writeset->pImageInfo + j);
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
unsigned sampler_offset = radv_combined_image_descriptor_sampler_offset(binding_layout);
- write_combined_image_sampler_descriptor(
- device, cmd_buffer, sampler_offset, ptr, buffer_list, writeset->descriptorType,
- writeset->pImageInfo + j, !binding_layout->immutable_samplers_offset);
+ write_combined_image_sampler_descriptor(device, cmd_buffer, sampler_offset, ptr, buffer_list,
+ writeset->descriptorType, writeset->pImageInfo + j,
+ !binding_layout->immutable_samplers_offset);
if (copy_immutable_samplers) {
const unsigned idx = writeset->dstArrayElement + j;
memcpy((char *)ptr + sampler_offset, samplers + 4 * idx, 16);
}
break;
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
- RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct,
- accel_structs->pAccelerationStructures[j]);
+ RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, accel_structs->pAccelerationStructures[j]);
- write_accel_struct(device, ptr,
- accel_struct ? vk_acceleration_structure_get_va(accel_struct) : 0);
+ write_accel_struct(device, ptr, accel_struct ? vk_acceleration_structure_get_va(accel_struct) : 0);
break;
}
default:
VKAPI_ATTR void VKAPI_CALL
radv_UpdateDescriptorSets(VkDevice _device, uint32_t descriptorWriteCount,
- const VkWriteDescriptorSet *pDescriptorWrites,
- uint32_t descriptorCopyCount,
+ const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount,
const VkCopyDescriptorSet *pDescriptorCopies)
{
RADV_FROM_HANDLE(radv_device, device, _device);
- radv_update_descriptor_sets_impl(device, NULL, VK_NULL_HANDLE, descriptorWriteCount,
- pDescriptorWrites, descriptorCopyCount, pDescriptorCopies);
+ radv_update_descriptor_sets_impl(device, NULL, VK_NULL_HANDLE, descriptorWriteCount, pDescriptorWrites,
+ descriptorCopyCount, pDescriptorCopies);
}
void
radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
VkDescriptorSet dstSetOverride, uint32_t descriptorWriteCount,
- const VkWriteDescriptorSet *pDescriptorWrites,
- uint32_t descriptorCopyCount,
+ const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount,
const VkCopyDescriptorSet *pDescriptorCopies)
{
/* Assume cmd_buffer != NULL to optimize out cmd_buffer checks in generic code above. */
assume(cmd_buffer != NULL);
- radv_update_descriptor_sets_impl(device, cmd_buffer, dstSetOverride, descriptorWriteCount,
- pDescriptorWrites, descriptorCopyCount, pDescriptorCopies);
+ radv_update_descriptor_sets_impl(device, cmd_buffer, dstSetOverride, descriptorWriteCount, pDescriptorWrites,
+ descriptorCopyCount, pDescriptorCopies);
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_CreateDescriptorUpdateTemplate(VkDevice _device,
- const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo,
+radv_CreateDescriptorUpdateTemplate(VkDevice _device, const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate)
{
for (i = 0; i < entry_count; i++) {
const VkDescriptorUpdateTemplateEntry *entry = &pCreateInfo->pDescriptorUpdateEntries[i];
- const struct radv_descriptor_set_binding_layout *binding_layout =
- set_layout->binding + entry->dstBinding;
+ const struct radv_descriptor_set_binding_layout *binding_layout = set_layout->binding + entry->dstBinding;
const uint32_t buffer_offset = binding_layout->buffer_offset + entry->dstArrayElement;
const uint32_t *immutable_samplers = NULL;
uint32_t dst_offset;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
case VK_DESCRIPTOR_TYPE_SAMPLER:
/* Immutable samplers are copied into push descriptors when they are pushed */
- if (pCreateInfo->templateType ==
- VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR &&
- binding_layout->immutable_samplers_offset &&
- !binding_layout->immutable_samplers_equal) {
- immutable_samplers =
- radv_immutable_samplers(set_layout, binding_layout) + entry->dstArrayElement * 4;
+ if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR &&
+ binding_layout->immutable_samplers_offset && !binding_layout->immutable_samplers_equal) {
+ immutable_samplers = radv_immutable_samplers(set_layout, binding_layout) + entry->dstArrayElement * 4;
}
break;
default:
}
VKAPI_ATTR void VKAPI_CALL
-radv_DestroyDescriptorUpdateTemplate(VkDevice _device,
- VkDescriptorUpdateTemplate descriptorUpdateTemplate,
+radv_DestroyDescriptorUpdateTemplate(VkDevice _device, VkDescriptorUpdateTemplate descriptorUpdateTemplate,
const VkAllocationCallbacks *pAllocator)
{
RADV_FROM_HANDLE(radv_device, device, _device);
}
static ALWAYS_INLINE void
-radv_update_descriptor_set_with_template_impl(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
+radv_update_descriptor_set_with_template_impl(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
struct radv_descriptor_set *set,
- VkDescriptorUpdateTemplate descriptorUpdateTemplate,
- const void *pData)
+ VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData)
{
RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
uint32_t i;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
const unsigned idx = templ->entry[i].dst_offset + j;
- assert(!(set->header.layout->flags &
- VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
- write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx,
- buffer_list, (struct VkDescriptorBufferInfo *)pSrc);
+ assert(!(set->header.layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
+ write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx, buffer_list,
+ (struct VkDescriptorBufferInfo *)pSrc);
break;
}
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- write_buffer_descriptor_impl(device, cmd_buffer, pDst, buffer_list,
- (struct VkDescriptorBufferInfo *)pSrc);
+ write_buffer_descriptor_impl(device, cmd_buffer, pDst, buffer_list, (struct VkDescriptorBufferInfo *)pSrc);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- write_texel_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
- *(VkBufferView *)pSrc);
+ write_texel_buffer_descriptor(device, cmd_buffer, pDst, buffer_list, *(VkBufferView *)pSrc);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- write_image_descriptor_impl(device, cmd_buffer, 32, pDst, buffer_list,
- templ->entry[i].descriptor_type,
+ write_image_descriptor_impl(device, cmd_buffer, 32, pDst, buffer_list, templ->entry[i].descriptor_type,
(struct VkDescriptorImageInfo *)pSrc);
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- write_image_descriptor_impl(device, cmd_buffer, 64, pDst, buffer_list,
- templ->entry[i].descriptor_type,
+ write_image_descriptor_impl(device, cmd_buffer, 64, pDst, buffer_list, templ->entry[i].descriptor_type,
(struct VkDescriptorImageInfo *)pSrc);
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- write_combined_image_sampler_descriptor(
- device, cmd_buffer, templ->entry[i].sampler_offset, pDst, buffer_list,
- templ->entry[i].descriptor_type, (struct VkDescriptorImageInfo *)pSrc,
- templ->entry[i].has_sampler);
+ write_combined_image_sampler_descriptor(device, cmd_buffer, templ->entry[i].sampler_offset, pDst,
+ buffer_list, templ->entry[i].descriptor_type,
+ (struct VkDescriptorImageInfo *)pSrc, templ->entry[i].has_sampler);
if (cmd_buffer && templ->entry[i].immutable_samplers) {
- memcpy((char *)pDst + templ->entry[i].sampler_offset,
- templ->entry[i].immutable_samplers + 4 * j, 16);
+ memcpy((char *)pDst + templ->entry[i].sampler_offset, templ->entry[i].immutable_samplers + 4 * j, 16);
}
break;
case VK_DESCRIPTOR_TYPE_SAMPLER:
if (templ->entry[i].has_sampler) {
const VkDescriptorImageInfo *pImageInfo = (struct VkDescriptorImageInfo *)pSrc;
write_sampler_descriptor(pDst, pImageInfo->sampler);
- }
- else if (cmd_buffer && templ->entry[i].immutable_samplers)
+ } else if (cmd_buffer && templ->entry[i].immutable_samplers)
memcpy(pDst, templ->entry[i].immutable_samplers + 4 * j, 16);
break;
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
- RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct,
- *(const VkAccelerationStructureKHR *)pSrc);
- write_accel_struct(device, pDst,
- accel_struct ? vk_acceleration_structure_get_va(accel_struct) : 0);
+ RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, *(const VkAccelerationStructureKHR *)pSrc);
+ write_accel_struct(device, pDst, accel_struct ? vk_acceleration_structure_get_va(accel_struct) : 0);
break;
}
default:
}
void
-radv_cmd_update_descriptor_set_with_template(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
+radv_cmd_update_descriptor_set_with_template(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
struct radv_descriptor_set *set,
- VkDescriptorUpdateTemplate descriptorUpdateTemplate,
- const void *pData)
+ VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData)
{
/* Assume cmd_buffer != NULL to optimize out cmd_buffer checks in generic code above. */
assume(cmd_buffer != NULL);
VKAPI_ATTR void VKAPI_CALL
radv_UpdateDescriptorSetWithTemplate(VkDevice _device, VkDescriptorSet descriptorSet,
- VkDescriptorUpdateTemplate descriptorUpdateTemplate,
- const void *pData)
+ VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_descriptor_set, set, descriptorSet);
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetDescriptorSetLayoutHostMappingInfoVALVE(
- VkDevice _device, const VkDescriptorSetBindingReferenceVALVE *pBindingReference,
- VkDescriptorSetLayoutHostMappingInfoVALVE *pHostMapping)
+radv_GetDescriptorSetLayoutHostMappingInfoVALVE(VkDevice _device,
+ const VkDescriptorSetBindingReferenceVALVE *pBindingReference,
+ VkDescriptorSetLayoutHostMappingInfoVALVE *pHostMapping)
{
struct radv_descriptor_set_layout *set_layout =
radv_descriptor_set_layout_from_handle(pBindingReference->descriptorSetLayout);
- const struct radv_descriptor_set_binding_layout *binding_layout =
- set_layout->binding + pBindingReference->binding;
+ const struct radv_descriptor_set_binding_layout *binding_layout = set_layout->binding + pBindingReference->binding;
pHostMapping->descriptorOffset = binding_layout->offset;
pHostMapping->descriptorSize = binding_layout->size;
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetDescriptorSetHostMappingVALVE(VkDevice _device, VkDescriptorSet descriptorSet,
- void **ppData)
+radv_GetDescriptorSetHostMappingVALVE(VkDevice _device, VkDescriptorSet descriptorSet, void **ppData)
{
RADV_FROM_HANDLE(radv_descriptor_set, set, descriptorSet);
*ppData = set->header.mapped_ptr;
/* VK_EXT_descriptor_buffer */
VKAPI_ATTR void VKAPI_CALL
-radv_GetDescriptorSetLayoutSizeEXT(VkDevice device, VkDescriptorSetLayout layout,
- VkDeviceSize *pLayoutSizeInBytes)
+radv_GetDescriptorSetLayoutSizeEXT(VkDevice device, VkDescriptorSetLayout layout, VkDeviceSize *pLayoutSizeInBytes)
{
RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, layout);
*pLayoutSizeInBytes = set_layout->size;
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetDescriptorSetLayoutBindingOffsetEXT(VkDevice device, VkDescriptorSetLayout layout,
- uint32_t binding, VkDeviceSize *pOffset)
+radv_GetDescriptorSetLayoutBindingOffsetEXT(VkDevice device, VkDescriptorSetLayout layout, uint32_t binding,
+ VkDeviceSize *pOffset)
{
RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, layout);
*pOffset = set_layout->binding[binding].offset;
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetDescriptorEXT(VkDevice _device, const VkDescriptorGetInfoEXT *pDescriptorInfo,
- size_t dataSize, void *pDescriptor)
+radv_GetDescriptorEXT(VkDevice _device, const VkDescriptorGetInfoEXT *pDescriptorInfo, size_t dataSize,
+ void *pDescriptor)
{
RADV_FROM_HANDLE(radv_device, device, _device);
write_sampler_descriptor(pDescriptor, *pDescriptorInfo->data.pSampler);
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- write_image_descriptor(pDescriptor, 64, pDescriptorInfo->type,
- pDescriptorInfo->data.pCombinedImageSampler);
+ write_image_descriptor(pDescriptor, 64, pDescriptorInfo->type, pDescriptorInfo->data.pCombinedImageSampler);
if (pDescriptorInfo->data.pCombinedImageSampler) {
- write_sampler_descriptor((uint32_t *)pDescriptor + 20,
- pDescriptorInfo->data.pCombinedImageSampler->sampler);
+ write_sampler_descriptor((uint32_t *)pDescriptor + 20, pDescriptorInfo->data.pCombinedImageSampler->sampler);
}
break;
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- write_image_descriptor(pDescriptor, 64, pDescriptorInfo->type,
- pDescriptorInfo->data.pInputAttachmentImage);
+ write_image_descriptor(pDescriptor, 64, pDescriptorInfo->type, pDescriptorInfo->data.pInputAttachmentImage);
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- write_image_descriptor(pDescriptor, 64, pDescriptorInfo->type,
- pDescriptorInfo->data.pSampledImage);
+ write_image_descriptor(pDescriptor, 64, pDescriptorInfo->type, pDescriptorInfo->data.pSampledImage);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- write_image_descriptor(pDescriptor, 32, pDescriptorInfo->type,
- pDescriptorInfo->data.pStorageImage);
+ write_image_descriptor(pDescriptor, 32, pDescriptorInfo->type, pDescriptorInfo->data.pStorageImage);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
const VkDescriptorAddressInfoEXT *addr_info = pDescriptorInfo->data.pUniformBuffer;
const VkDescriptorAddressInfoEXT *addr_info = pDescriptorInfo->data.pUniformTexelBuffer;
if (addr_info && addr_info->address) {
- radv_make_texel_buffer_descriptor(device, addr_info->address, addr_info->format, 0,
- addr_info->range, pDescriptor);
+ radv_make_texel_buffer_descriptor(device, addr_info->address, addr_info->format, 0, addr_info->range,
+ pDescriptor);
} else {
memset(pDescriptor, 0, 4 * 4);
}
const VkDescriptorAddressInfoEXT *addr_info = pDescriptorInfo->data.pStorageTexelBuffer;
if (addr_info && addr_info->address) {
- radv_make_texel_buffer_descriptor(device, addr_info->address, addr_info->format, 0,
- addr_info->range, pDescriptor);
+ radv_make_texel_buffer_descriptor(device, addr_info->address, addr_info->format, 0, addr_info->range,
+ pDescriptor);
} else {
memset(pDescriptor, 0, 4 * 4);
}
}
static inline unsigned
-radv_combined_image_descriptor_sampler_offset(
- const struct radv_descriptor_set_binding_layout *binding)
+radv_combined_image_descriptor_sampler_offset(const struct radv_descriptor_set_binding_layout *binding)
{
return binding->size - ((!binding->immutable_samplers_equal) ? 16 : 0);
}
if (!set->ycbcr_sampler_offsets_offset)
return NULL;
- const uint32_t *offsets =
- (const uint32_t *)((const char *)set + set->ycbcr_sampler_offsets_offset);
+ const uint32_t *offsets = (const uint32_t *)((const char *)set + set->ycbcr_sampler_offsets_offset);
if (offsets[binding_index] == 0)
return NULL;
struct radv_device;
-void radv_pipeline_layout_init(struct radv_device *device, struct radv_pipeline_layout *layout,
- bool independent_sets);
+void radv_pipeline_layout_init(struct radv_device *device, struct radv_pipeline_layout *layout, bool independent_sets);
void radv_pipeline_layout_add_set(struct radv_pipeline_layout *layout, uint32_t set_idx,
struct radv_descriptor_set_layout *set_layout);
void radv_pipeline_layout_hash(struct radv_pipeline_layout *layout);
#include "util/os_time.h"
#include "util/timespec.h"
#include "util/u_atomic.h"
+#include "vulkan/vk_icd.h"
#include "winsys/null/radv_null_winsys_public.h"
#include "git_sha1.h"
#include "sid.h"
#include "vk_format.h"
#include "vk_sync.h"
#include "vk_sync_dummy.h"
-#include "vulkan/vk_icd.h"
#ifdef LLVM_AVAILABLE
#include "ac_llvm_util.h"
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_GetMemoryHostPointerPropertiesEXT(
- VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, const void *pHostPointer,
- VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
+radv_GetMemoryHostPointerPropertiesEXT(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType,
+ const void *pHostPointer,
+ VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
{
RADV_FROM_HANDLE(radv_device, device, _device);
{
VkResult result;
- result = device->ws->buffer_create(
- device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
- RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING,
- RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo);
+ result =
+ device->ws->buffer_create(device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING,
+ RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo);
if (result != VK_SUCCESS)
return vk_error(device, result);
if (!prolog)
return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- assert(idx ==
- radv_instance_rate_prolog_index(num_attributes, state.instance_rate_inputs));
+ assert(idx == radv_instance_rate_prolog_index(num_attributes, state.instance_rate_inputs));
device->instance_rate_vs_prologs[idx++] = prolog;
}
}
radv_device_finish_vs_prologs(struct radv_device *device)
{
if (device->vs_prologs) {
- hash_table_foreach(device->vs_prologs, entry)
- {
+ hash_table_foreach (device->vs_prologs, entry) {
free((void *)entry->key);
radv_shader_part_unref(device, entry->data);
}
radv_device_finish_ps_epilogs(struct radv_device *device)
{
if (device->ps_epilogs) {
- hash_table_foreach(device->ps_epilogs, entry)
- {
+ hash_table_foreach (device->ps_epilogs, entry) {
free((void *)entry->key);
radv_shader_part_unref(device, entry->data);
}
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
};
- result = radv_image_create(radv_device_to_handle(device),
- &(struct radv_image_create_info){.vk_info = &image_create_info},
- &device->meta_state.alloc, &image, true);
+ result =
+ radv_image_create(radv_device_to_handle(device), &(struct radv_image_create_info){.vk_info = &image_create_info},
+ &device->meta_state.alloc, &image, true);
if (result != VK_SUCCESS)
return result;
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
};
- result =
- radv_create_buffer(device, &buffer_create_info, &device->meta_state.alloc, &buffer, true);
+ result = radv_create_buffer(device, &buffer_create_info, &device->meta_state.alloc, &buffer, true);
if (result != VK_SUCCESS)
goto fail_create;
if (result != VK_SUCCESS)
goto fail_alloc;
- VkBindBufferMemoryInfo bind_info = {
- .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
- .buffer = buffer,
- .memory = mem,
- .memoryOffset = 0
- };
+ VkBindBufferMemoryInfo bind_info = {.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
+ .buffer = buffer,
+ .memory = mem,
+ .memoryOffset = 0};
result = radv_BindBufferMemory2(radv_device_to_handle(device), 1, &bind_info);
if (result != VK_SUCCESS)
radv_FreeMemory(radv_device_to_handle(device), radv_device_memory_to_handle(device->vrs.mem),
&device->meta_state.alloc);
radv_DestroyBuffer(radv_device_to_handle(device), radv_buffer_to_handle(device->vrs.buffer),
- &device->meta_state.alloc);
- radv_DestroyImage(radv_device_to_handle(device), radv_image_to_handle(device->vrs.image),
- &device->meta_state.alloc);
+ &device->meta_state.alloc);
+ radv_DestroyImage(radv_device_to_handle(device), radv_image_to_handle(device->vrs.image), &device->meta_state.alloc);
}
static enum radv_force_vrs
while (!notifier->quit) {
const char *file = radv_get_force_vrs_config_file();
- struct timespec tm = { .tv_nsec = 100000000 }; /* 1OOms */
+ struct timespec tm = {.tv_nsec = 100000000}; /* 1OOms */
int length, i = 0;
length = read(notifier->fd, buf, BUF_LEN);
};
static void
-add_entrypoints(struct dispatch_table_builder *b,
- const struct vk_device_entrypoint_table *entrypoints,
+add_entrypoints(struct dispatch_table_builder *b, const struct vk_device_entrypoint_table *entrypoints,
enum radv_dispatch_table table)
{
for (int32_t i = table - 1; i >= RADV_DEVICE_DISPATCH_TABLE; i--) {
robust_buffer_access = true;
}
- vk_foreach_struct_const(ext, pCreateInfo->pNext)
- {
+ vk_foreach_struct_const (ext, pCreateInfo->pNext) {
switch (ext->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
}
case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
- if (overallocation->overallocationBehavior ==
- VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
+ if (overallocation->overallocationBehavior == VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
overallocation_disallowed = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
- const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features =
- (const void *)ext;
+ const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = (const void *)ext;
custom_border_colors = border_color_features->customBorderColors;
break;
}
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
const VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features = (const void *)ext;
- if (features->shaderImageFloat32Atomics ||
- features->sparseImageFloat32Atomics)
+ if (features->shaderImageFloat32Atomics || features->sparseImageFloat32Atomics)
image_float32_atomics = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: {
const VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features = (const void *)ext;
- if (features->shaderImageFloat32AtomicMinMax ||
- features->sparseImageFloat32AtomicMinMax)
+ if (features->shaderImageFloat32AtomicMinMax || features->sparseImageFloat32AtomicMinMax)
image_float32_atomics = true;
break;
}
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: {
const VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features = (const void *)ext;
- if (features->primitivesGeneratedQuery ||
- features->primitivesGeneratedQueryWithRasterizerDiscard ||
+ if (features->primitivesGeneratedQuery || features->primitivesGeneratedQueryWithRasterizerDiscard ||
features->primitivesGeneratedQueryWithNonZeroStreams)
primitives_generated_query = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT: {
const VkPhysicalDeviceExtendedDynamicState3FeaturesEXT *features = (const void *)ext;
- if (features->extendedDynamicState3ColorBlendEnable ||
- features->extendedDynamicState3ColorWriteMask ||
- features->extendedDynamicState3AlphaToCoverageEnable ||
- features->extendedDynamicState3ColorBlendEquation)
+ if (features->extendedDynamicState3ColorBlendEnable || features->extendedDynamicState3ColorWriteMask ||
+ features->extendedDynamicState3AlphaToCoverageEnable || features->extendedDynamicState3ColorBlendEquation)
ps_epilogs = true;
break;
}
/* With update after bind we can't attach bo's to the command buffer
* from the descriptor set anymore, so we have to use a global BO list.
*/
- device->use_global_bo_list = global_bo_list ||
- (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
+ device->use_global_bo_list = global_bo_list || (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
device->vk.enabled_extensions.EXT_descriptor_indexing ||
device->vk.enabled_extensions.EXT_buffer_device_address ||
device->vk.enabled_extensions.KHR_buffer_device_address ||
const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority =
vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
- device->queues[qfi] =
- vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(struct radv_queue), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ device->queues[qfi] = vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(struct radv_queue), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!device->queues[qfi]) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail_queue;
}
device->private_sdma_queue = VK_NULL_HANDLE;
- device->shader_use_invisible_vram =
- (device->instance->perftest_flags & RADV_PERFTEST_DMA_SHADERS) &&
- /* SDMA buffer copy is only implemented for GFX7+. */
- device->physical_device->rad_info.gfx_level >= GFX7;
+ device->shader_use_invisible_vram = (device->instance->perftest_flags & RADV_PERFTEST_DMA_SHADERS) &&
+ /* SDMA buffer copy is only implemented for GFX7+. */
+ device->physical_device->rad_info.gfx_level >= GFX7;
result = radv_init_shader_upload_queue(device);
if (result != VK_SUCCESS)
goto fail;
- device->pbb_allowed = device->physical_device->rad_info.gfx_level >= GFX9 &&
- !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
+ device->pbb_allowed =
+ device->physical_device->rad_info.gfx_level >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
/* The maximum number of scratch waves. Scratch space isn't divided
* evenly between CUs. The number is only a function of the number of CUs.
* async compute). I've seen ~2% performance difference between 4 and 32.
*/
uint32_t max_threads_per_block = 2048;
- device->scratch_waves =
- MAX2(32 * physical_device->rad_info.num_cu, max_threads_per_block / 64);
+ device->scratch_waves = MAX2(32 * physical_device->rad_info.num_cu, max_threads_per_block / 64);
device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
* The kernel may not support preemption, but PAL always sets this bit,
* so let's also set it here for consistency.
*/
- device->dispatch_initiator_task =
- device->dispatch_initiator | S_00B800_DISABLE_DISP_PREMPT_EN(1);
+ device->dispatch_initiator_task = device->dispatch_initiator | S_00B800_DISABLE_DISP_PREMPT_EN(1);
if (device->instance->debug_flags & RADV_DEBUG_HANG) {
/* Enable GPU hangs detection and dump logs if a GPU hang is
goto fail;
}
- fprintf(stderr,
- "*****************************************************************************\n");
- fprintf(stderr,
- "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
- fprintf(stderr,
- "*****************************************************************************\n");
+ fprintf(stderr, "*****************************************************************************\n");
+ fprintf(stderr, "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
+ fprintf(stderr, "*****************************************************************************\n");
/* Wait for idle after every draw/dispatch to identify the
* first bad call.
}
if (radv_sqtt_enabled()) {
- if (device->physical_device->rad_info.gfx_level < GFX8 ||
- device->physical_device->rad_info.gfx_level > GFX11) {
+ if (device->physical_device->rad_info.gfx_level < GFX8 || device->physical_device->rad_info.gfx_level > GFX11) {
fprintf(stderr, "GPU hardware not supported: refer to "
"the RGP documentation for the list of "
"supported GPUs!\n");
fprintf(stderr,
"radv: Thread trace support is enabled (initial buffer size: %u MiB, "
"instruction timing: %s, cache counters: %s).\n",
- device->sqtt.buffer_size / (1024 * 1024),
- radv_is_instruction_timing_enabled() ? "enabled" : "disabled",
+ device->sqtt.buffer_size / (1024 * 1024), radv_is_instruction_timing_enabled() ? "enabled" : "disabled",
radv_spm_trace_enabled() ? "enabled" : "disabled");
if (radv_spm_trace_enabled()) {
goto fail;
}
} else {
- fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n",
- device->physical_device->name);
+ fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n", device->physical_device->name);
}
}
}
device->force_aniso = MIN2(16, (int)debug_get_num_option("RADV_TEX_ANISO", -1));
if (device->force_aniso >= 0) {
- fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
- 1 << util_logbase2(device->force_aniso));
+ fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n", 1 << util_logbase2(device->force_aniso));
}
if (use_perf_counters) {
size_t bo_size = PERF_CTR_BO_PASS_OFFSET + sizeof(uint64_t) * PERF_CTR_MAX_PASSES;
- result =
- device->ws->buffer_create(device->ws, bo_size, 4096, RADEON_DOMAIN_GTT,
- RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
- RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->perf_counter_bo);
+ result = device->ws->buffer_create(device->ws, bo_size, 4096, RADEON_DOMAIN_GTT,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
+ RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->perf_counter_bo);
if (result != VK_SUCCESS)
goto fail_cache;
- device->perf_counter_lock_cs =
- calloc(sizeof(struct radeon_winsys_cs *), 2 * PERF_CTR_MAX_PASSES);
+ device->perf_counter_lock_cs = calloc(sizeof(struct radeon_winsys_cs *), 2 * PERF_CTR_MAX_PASSES);
if (!device->perf_counter_lock_cs) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail_cache;
pMemoryRequirements->memoryRequirements.size = image->size;
pMemoryRequirements->memoryRequirements.alignment = image->alignment;
- vk_foreach_struct(ext, pMemoryRequirements->pNext)
- {
+ vk_foreach_struct (ext, pMemoryRequirements->pNext) {
switch (ext->sType) {
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
- req->requiresDedicatedAllocation =
- image->shareable && image->vk.tiling != VK_IMAGE_TILING_LINEAR;
+ req->requiresDedicatedAllocation = image->shareable && image->vk.tiling != VK_IMAGE_TILING_LINEAR;
req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
break;
}
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetDeviceImageMemoryRequirements(VkDevice device,
- const VkDeviceImageMemoryRequirements *pInfo,
+radv_GetDeviceImageMemoryRequirements(VkDevice device, const VkDeviceImageMemoryRequirements *pInfo,
VkMemoryRequirements2 *pMemoryRequirements)
{
UNUSED VkResult result;
* creating an image.
* TODO: Avoid creating an image.
*/
- result = radv_image_create(
- device, &(struct radv_image_create_info){.vk_info = pInfo->pCreateInfo}, NULL, &image, true);
+ result =
+ radv_image_create(device, &(struct radv_image_create_info){.vk_info = pInfo->pCreateInfo}, NULL, &image, true);
assert(result == VK_SUCCESS);
VkImageMemoryRequirementsInfo2 info2 = {
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount,
- const VkBindImageMemoryInfo *pBindInfos)
+radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount, const VkBindImageMemoryInfo *pBindInfos)
{
RADV_FROM_HANDLE(radv_device, device, _device);
if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
struct radv_image *swapchain_img =
- radv_image_from_handle(wsi_common_get_image(
- swapchain_info->swapchain, swapchain_info->imageIndex));
+ radv_image_from_handle(wsi_common_get_image(swapchain_info->swapchain, swapchain_info->imageIndex));
image->bindings[0].bo = swapchain_img->bindings[0].bo;
image->bindings[0].offset = swapchain_img->bindings[0].offset;
radv_GetImageMemoryRequirements2(_device, &info, &reqs);
if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) {
- return vk_errorf(device, VK_ERROR_UNKNOWN,
- "Device memory object too small for the image.\n");
+ return vk_errorf(device, VK_ERROR_UNKNOWN, "Device memory object too small for the image.\n");
}
}
vk_find_struct_const(pBindInfos[i].pNext, BIND_IMAGE_PLANE_MEMORY_INFO);
switch (plane_info->planeAspect) {
- case VK_IMAGE_ASPECT_PLANE_0_BIT:
- image->bindings[0].bo = mem->bo;
- image->bindings[0].offset = pBindInfos[i].memoryOffset;
- break;
- case VK_IMAGE_ASPECT_PLANE_1_BIT:
- image->bindings[1].bo = mem->bo;
- image->bindings[1].offset = pBindInfos[i].memoryOffset;
- break;
- case VK_IMAGE_ASPECT_PLANE_2_BIT:
- image->bindings[2].bo = mem->bo;
- image->bindings[2].offset = pBindInfos[i].memoryOffset;
- break;
- default:
- break;
+ case VK_IMAGE_ASPECT_PLANE_0_BIT:
+ image->bindings[0].bo = mem->bo;
+ image->bindings[0].offset = pBindInfos[i].memoryOffset;
+ break;
+ case VK_IMAGE_ASPECT_PLANE_1_BIT:
+ image->bindings[1].bo = mem->bo;
+ image->bindings[1].offset = pBindInfos[i].memoryOffset;
+ break;
+ case VK_IMAGE_ASPECT_PLANE_2_BIT:
+ image->bindings[2].bo = mem->bo;
+ image->bindings[2].offset = pBindInfos[i].memoryOffset;
+ break;
+ default:
+ break;
}
} else {
image->bindings[0].bo = mem->bo;
}
static unsigned
-get_dcc_max_uncompressed_block_size(const struct radv_device *device,
- const struct radv_image_view *iview)
+get_dcc_max_uncompressed_block_size(const struct radv_device *device, const struct radv_image_view *iview)
{
if (device->physical_device->rad_info.gfx_level < GFX10 && iview->image->vk.samples > 1) {
if (iview->image->planes[0].surface.bpe == 1)
/* For GFX9+ ac_surface computes values for us (except min_compressed
* and max_uncompressed) */
if (device->physical_device->rad_info.gfx_level >= GFX9) {
- max_compressed_block_size =
- iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size;
+ max_compressed_block_size = iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size;
independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks;
independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks;
} else {
independent_128b_blocks = 0;
- if (iview->image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
- VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
+ if (iview->image->vk.usage &
+ (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
/* If this DCC image is potentially going to be used in texture
* fetches, we need some special settings.
*/
cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX6(desc->swizzle[3] == PIPE_SWIZZLE_1);
uint32_t plane_id = iview->image->disjoint ? iview->plane_id : 0;
- va = radv_buffer_get_va(iview->image->bindings[plane_id].bo) +
- iview->image->bindings[plane_id].offset;
+ va = radv_buffer_get_va(iview->image->bindings[plane_id].bo) + iview->image->bindings[plane_id].offset;
if (iview->nbc_view.valid) {
va += iview->nbc_view.base_address_offset;
cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
S_028C74_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
- S_028C74_RB_ALIGNED(meta.rb_aligned) |
- S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
+ S_028C74_RB_ALIGNED(meta.rb_aligned) | S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.epitch);
}
if (radv_image_has_fmask(iview->image)) {
if (device->physical_device->rad_info.gfx_level >= GFX7)
- cb->cb_color_pitch |=
- S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
+ cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index);
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max);
} else {
va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset;
va += surf->meta_offset;
- if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) &&
- device->physical_device->rad_info.gfx_level <= GFX8)
+ if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && device->physical_device->rad_info.gfx_level <= GFX8)
va += plane->surface.u.legacy.color.dcc_level[iview->vk.base_mip_level].dcc_offset;
unsigned dcc_tile_swizzle = tile_swizzle;
if (device->physical_device->rad_info.gfx_level >= GFX11)
cb->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_samples);
else
- cb->cb_color_attrib |=
- S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_samples);
+ cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_samples);
}
if (radv_image_has_fmask(iview->image)) {
- va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset +
- surf->fmask_offset;
+ va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + surf->fmask_offset;
cb->cb_color_fmask = va >> 8;
cb->cb_color_fmask |= surf->fmask_tile_swizzle;
} else {
endian = radv_colorformat_endian_swap(format);
/* blend clamp should be set for all NORM/SRGB types */
- if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||
- ntype == V_028C70_NUMBER_SRGB)
+ if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM || ntype == V_028C70_NUMBER_SRGB)
blend_clamp = 1;
/* set blend bypass according to docs if SINT/UINT or
8/24 COLOR variants */
- if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
- format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
- format == V_028C70_COLOR_X24_8_32_FLOAT) {
+ if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || format == V_028C70_COLOR_8_24 ||
+ format == V_028C70_COLOR_24_8 || format == V_028C70_COLOR_X24_8_32_FLOAT) {
blend_clamp = 0;
blend_bypass = 1;
}
format == V_028C70_COLOR_8_8_8_8))
->color_is_int8 = true;
#endif
- cb->cb_color_info =
- S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) |
- S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) |
- S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM &&
- ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 &&
- format != V_028C70_COLOR_24_8) |
- S_028C70_NUMBER_TYPE(ntype);
+ cb->cb_color_info = S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) |
+ S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) |
+ S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM &&
+ ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 &&
+ format != V_028C70_COLOR_24_8) |
+ S_028C70_NUMBER_TYPE(ntype);
if (device->physical_device->rad_info.gfx_level >= GFX11)
cb->cb_color_info |= S_028C70_FORMAT_GFX11(format);
}
}
- if (radv_image_has_cmask(iview->image) &&
- !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
+ if (radv_image_has_cmask(iview->image) && !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && !iview->disable_dcc_mrt &&
}
if (device->physical_device->rad_info.gfx_level >= GFX9) {
- unsigned mip0_depth = iview->image->vk.image_type == VK_IMAGE_TYPE_3D
- ? (iview->extent.depth - 1)
- : (iview->image->vk.array_layers - 1);
- unsigned width =
- vk_format_get_plane_width(iview->image->vk.format, iview->plane_id, iview->extent.width);
- unsigned height =
- vk_format_get_plane_height(iview->image->vk.format, iview->plane_id, iview->extent.height);
+ unsigned mip0_depth = iview->image->vk.image_type == VK_IMAGE_TYPE_3D ? (iview->extent.depth - 1)
+ : (iview->image->vk.array_layers - 1);
+ unsigned width = vk_format_get_plane_width(iview->image->vk.format, iview->plane_id, iview->extent.width);
+ unsigned height = vk_format_get_plane_height(iview->image->vk.format, iview->plane_id, iview->extent.height);
unsigned max_mip = iview->image->vk.mip_levels - 1;
if (device->physical_device->rad_info.gfx_level >= GFX10) {
cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(base_level);
- cb->cb_color_attrib3 |=
- S_028EE0_MIP0_DEPTH(mip0_depth) | S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
- S_028EE0_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level >= GFX11 ? 0 : 1);
+ cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) | S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
+ S_028EE0_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level >= GFX11 ? 0 : 1);
} else {
cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->vk.base_mip_level);
- cb->cb_color_attrib |=
- S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
+ cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
}
/* GFX10.3+ can set a custom pitch for 1D and 2D non-array, but it must be a multiple
*
* We set the pitch in MIP0_WIDTH.
*/
- if (device->physical_device->rad_info.gfx_level &&
- iview->image->vk.image_type == VK_IMAGE_TYPE_2D &&
- iview->image->vk.array_layers == 1 &&
- plane->surface.is_linear) {
+ if (device->physical_device->rad_info.gfx_level && iview->image->vk.image_type == VK_IMAGE_TYPE_2D &&
+ iview->image->vk.array_layers == 1 && plane->surface.is_linear) {
assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0);
width = plane->surface.u.gfx9.surf_pitch;
width *= 2;
}
- cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) | S_028C68_MIP0_HEIGHT(height - 1) |
- S_028C68_MAX_MIP(max_mip);
+ cb->cb_color_attrib2 =
+ S_028C68_MIP0_WIDTH(width - 1) | S_028C68_MIP0_HEIGHT(height - 1) | S_028C68_MAX_MIP(max_mip);
}
}
/* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
if (device->physical_device->rad_info.has_two_planes_iterate256_bug &&
- radv_image_get_iterate256(device, iview->image) &&
- !radv_image_tile_stencil_disabled(device, iview->image) &&
+ radv_image_get_iterate256(device, iview->image) && !radv_image_tile_stencil_disabled(device, iview->image) &&
iview->image->vk.samples == 4) {
max_zplanes = 1;
}
}
void
-radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
- struct radv_ds_buffer_info *ds)
+radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer, struct radv_ds_buffer_info *ds)
{
const struct radeon_surf *surf = &image->planes[0].surface;
ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
- ds->db_z_info = S_028038_FORMAT(V_028040_Z_16) |
- S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
- S_028038_ZRANGE_PRECISION(1) |
- S_028038_TILE_SURFACE_ENABLE(1);
+ ds->db_z_info = S_028038_FORMAT(V_028040_Z_16) | S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
+ S_028038_ZRANGE_PRECISION(1) | S_028038_TILE_SURFACE_ENABLE(1);
ds->db_stencil_info = S_02803C_FORMAT(V_028044_STENCIL_INVALID);
- ds->db_depth_size = S_02801C_X_MAX(image->vk.extent.width - 1) |
- S_02801C_Y_MAX(image->vk.extent.height - 1);
+ ds->db_depth_size = S_02801C_X_MAX(image->vk.extent.width - 1) | S_02801C_Y_MAX(image->vk.extent.height - 1);
ds->db_htile_data_base = radv_buffer_get_va(htile_buffer->bo) >> 8;
- ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1) |
- S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
+ ds->db_htile_surface =
+ S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1) | S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
}
void
stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
- ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) |
- S_028008_SLICE_MAX(max_slice);
+ ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) | S_028008_SLICE_MAX(max_slice);
if (device->physical_device->rad_info.gfx_level >= GFX10) {
- ds->db_depth_view |= S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) |
- S_028008_SLICE_MAX_HI(max_slice >> 11);
+ ds->db_depth_view |=
+ S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11);
}
ds->db_htile_data_base = 0;
assert(surf->u.gfx9.surf_offset == 0);
s_offs += surf->u.gfx9.zs.stencil_offset;
- ds->db_z_info = S_028038_FORMAT(format) |
- S_028038_NUM_SAMPLES(util_logbase2(iview->image->vk.samples)) |
- S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
- S_028038_MAXMIP(iview->image->vk.mip_levels - 1) |
+ ds->db_z_info = S_028038_FORMAT(format) | S_028038_NUM_SAMPLES(util_logbase2(iview->image->vk.samples)) |
+ S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) | S_028038_MAXMIP(iview->image->vk.mip_levels - 1) |
S_028038_ZRANGE_PRECISION(1) |
S_028040_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11);
- ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
- S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) |
+ ds->db_stencil_info = S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) |
S_028044_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11);
if (device->physical_device->rad_info.gfx_level == GFX9) {
}
ds->db_depth_view |= S_028008_MIPID(level);
- ds->db_depth_size = S_02801C_X_MAX(iview->image->vk.extent.width - 1) |
- S_02801C_Y_MAX(iview->image->vk.extent.height - 1);
+ ds->db_depth_size =
+ S_02801C_X_MAX(iview->image->vk.extent.width - 1) | S_02801C_Y_MAX(iview->image->vk.extent.height - 1);
if (radv_htile_enabled(iview->image, level)) {
ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
}
- va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset +
- surf->meta_offset;
+ va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + surf->meta_offset;
ds->db_htile_data_base = va >> 8;
ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
}
- ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
- S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
- ds->db_depth_slice =
- S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
+ ds->db_depth_size =
+ S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) | S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
+ ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
if (radv_htile_enabled(iview->image, level)) {
ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
}
- va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset +
- surf->meta_offset;
+ va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + surf->meta_offset;
ds->db_htile_data_base = va >> 8;
ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
}
void
-radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples,
- unsigned *db_render_control)
+radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples, unsigned *db_render_control)
{
const struct radv_physical_device *pdevice = device->physical_device;
unsigned max_allowed_tiles_in_wave = 0;
max_allowed_tiles_in_wave = 15;
}
- *db_render_control |= S_028000_OREO_MODE(V_028000_OMODE_O_THEN_B) |
- S_028000_MAX_ALLOWED_TILES_IN_WAVE(max_allowed_tiles_in_wave);
+ *db_render_control |=
+ S_028000_OREO_MODE(V_028000_OMODE_O_THEN_B) | S_028000_MAX_ALLOWED_TILES_IN_WAVE(max_allowed_tiles_in_wave);
}
VKAPI_ATTR VkResult VKAPI_CALL
}
static uint32_t
-radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev,
- enum radeon_bo_domain domains, enum radeon_bo_flag flags,
- enum radeon_bo_flag ignore_flags)
+radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev, enum radeon_bo_domain domains,
+ enum radeon_bo_flag flags, enum radeon_bo_flag ignore_flags)
{
/* Don't count GTT/CPU as relevant:
*
* - We're not fully consistent between the two.
* - Sometimes VRAM gets VRAM|GTT.
*/
- const enum radeon_bo_domain relevant_domains =
- RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA;
+ const enum radeon_bo_domain relevant_domains = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA;
uint32_t bits = 0;
for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
return bits;
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType,
- int fd, VkMemoryFdPropertiesKHR *pMemoryFdProperties)
+radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, int fd,
+ VkMemoryFdPropertiesKHR *pMemoryFdProperties)
{
RADV_FROM_HANDLE(radv_device, device, _device);
if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
- pMemoryFdProperties->memoryTypeBits =
- radv_compute_valid_memory_types(device->physical_device, domains, flags);
+ pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(device->physical_device, domains, flags);
return VK_SUCCESS;
}
default:
#ifndef _WIN32
VKAPI_ATTR VkResult VKAPI_CALL
radv_GetCalibratedTimestampsEXT(VkDevice _device, uint32_t timestampCount,
- const VkCalibratedTimestampInfoEXT *pTimestampInfos,
- uint64_t *pTimestamps, uint64_t *pMaxDeviation)
+ const VkCalibratedTimestampInfoEXT *pTimestampInfos, uint64_t *pTimestamps,
+ uint64_t *pMaxDeviation)
{
RADV_FROM_HANDLE(radv_device, device, _device);
uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
#include "vk_common_entrypoints.h"
static void
-radv_get_sequence_size(const struct radv_indirect_command_layout *layout,
- const struct radv_graphics_pipeline *pipeline, uint32_t *cmd_size,
- uint32_t *upload_size)
+radv_get_sequence_size(const struct radv_indirect_command_layout *layout, const struct radv_graphics_pipeline *pipeline,
+ uint32_t *cmd_size, uint32_t *upload_size)
{
const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk);
const struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
if (layout->bind_vbo_mask) {
*upload_size += 16 * util_bitcount(vs->info.vs.vb_desc_usage_mask);
- /* One PKT3_SET_SH_REG for emitting VBO pointer (32-bit) */
+ /* One PKT3_SET_SH_REG for emitting VBO pointer (32-bit) */
*cmd_size += 3 * 4;
}
*cmd_size += (2 + locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].num_sgprs) * 4;
}
if (need_copy)
- *upload_size +=
- align(pipeline->base.push_constant_size + 16 * pipeline->base.dynamic_offset_count, 16);
+ *upload_size += align(pipeline->base.push_constant_size + 16 * pipeline->base.dynamic_offset_count, 16);
}
if (layout->binds_index_buffer) {
{
assert(value->bit_size >= 32);
nir_ssa_def *offset = nir_load_var(b, cs->offset);
- nir_store_ssbo(b, value, cs->descriptor, offset,.access = ACCESS_NON_READABLE);
+ nir_store_ssbo(b, value, cs->descriptor, offset, .access = ACCESS_NON_READABLE);
nir_store_var(b, cs->offset, nir_iadd_imm(b, offset, value->num_components * value->bit_size / 8), 0x1);
}
+#define load_param32(b, field) \
+ nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), .base = offsetof(struct radv_dgc_params, field), .range = 4)
-#define load_param32(b, field) \
- nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \
- .base = offsetof(struct radv_dgc_params, field), .range = 4)
+#define load_param16(b, field) \
+ nir_ubfe_imm((b), \
+ nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \
+ .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4), \
+ (offsetof(struct radv_dgc_params, field) & 2) * 8, 16)
-#define load_param16(b, field) \
- nir_ubfe_imm( \
- (b), \
- nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \
- .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4), \
- (offsetof(struct radv_dgc_params, field) & 2) * 8, 16)
+#define load_param8(b, field) \
+ nir_ubfe_imm((b), \
+ nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \
+ .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4), \
+ (offsetof(struct radv_dgc_params, field) & 3) * 8, 8)
-#define load_param8(b, field) \
- nir_ubfe_imm( \
- (b), \
- nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \
- .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4), \
- (offsetof(struct radv_dgc_params, field) & 3) * 8, 8)
-
-#define load_param64(b, field) \
- nir_pack_64_2x32((b), nir_load_push_constant((b), 2, 32, nir_imm_int((b), 0), \
- .base = offsetof(struct radv_dgc_params, field), .range = 8))
+#define load_param64(b, field) \
+ nir_pack_64_2x32((b), nir_load_push_constant((b), 2, 32, nir_imm_int((b), 0), \
+ .base = offsetof(struct radv_dgc_params, field), .range = 8))
static nir_ssa_def *
nir_pkt3(nir_builder *b, unsigned op, nir_ssa_def *len)
}
static void
-dgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vtx_base_sgpr,
- nir_ssa_def *first_vertex, nir_ssa_def *first_instance, nir_ssa_def *drawid)
+dgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vtx_base_sgpr, nir_ssa_def *first_vertex,
+ nir_ssa_def *first_instance, nir_ssa_def *drawid)
{
vtx_base_sgpr = nir_u2u32(b, vtx_base_sgpr);
- nir_ssa_def *has_drawid =
- nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID);
- nir_ssa_def *has_baseinstance =
- nir_test_mask(b, vtx_base_sgpr, DGC_USES_BASEINSTANCE);
+ nir_ssa_def *has_drawid = nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID);
+ nir_ssa_def *has_baseinstance = nir_test_mask(b, vtx_base_sgpr, DGC_USES_BASEINSTANCE);
nir_ssa_def *pkt_cnt = nir_imm_int(b, 1);
pkt_cnt = nir_bcsel(b, has_drawid, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt);
nir_imm_int(b, PKT3_NOP_PAD), nir_imm_int(b, PKT3_NOP_PAD),
};
- values[3] = nir_bcsel(b, nir_ior(b, has_drawid, has_baseinstance),
- nir_bcsel(b, has_drawid, drawid, first_instance), values[4]);
+ values[3] = nir_bcsel(b, nir_ior(b, has_drawid, has_baseinstance), nir_bcsel(b, has_drawid, drawid, first_instance),
+ values[4]);
values[4] = nir_bcsel(b, nir_iand(b, has_drawid, has_baseinstance), first_instance, values[4]);
dgc_emit(b, cs, nir_vec(b, values, 5));
}
static void
-dgc_emit_draw_index_offset_2(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *index_offset,
- nir_ssa_def *index_count, nir_ssa_def *max_index_count)
+dgc_emit_draw_index_offset_2(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *index_offset, nir_ssa_def *index_count,
+ nir_ssa_def *max_index_count)
{
- nir_ssa_def *values[5] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_OFFSET_2, 3, false)),
- max_index_count, index_offset, index_count,
- nir_imm_int(b, V_0287F0_DI_SRC_SEL_DMA)};
+ nir_ssa_def *values[5] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_OFFSET_2, 3, false)), max_index_count, index_offset,
+ index_count, nir_imm_int(b, V_0287F0_DI_SRC_SEL_DMA)};
dgc_emit(b, cs, nir_vec(b, values, 5));
}
{
nir_ssa_def *cmd_buf_tail_start = nir_imul(b, cmd_buf_stride, sequence_count);
- nir_variable *offset =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "offset");
+ nir_variable *offset = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "offset");
nir_store_var(b, offset, cmd_buf_tail_start, 0x1);
nir_ssa_def *dst_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PREPARE);
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV.
*/
static void
-dgc_emit_draw(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf,
- nir_ssa_def *stream_base, nir_ssa_def *draw_params_offset, nir_ssa_def *sequence_id)
+dgc_emit_draw(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
+ nir_ssa_def *draw_params_offset, nir_ssa_def *sequence_id)
{
nir_ssa_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
nir_ssa_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV.
*/
static void
-dgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf,
- nir_ssa_def *stream_base, nir_ssa_def *draw_params_offset,
- nir_ssa_def *sequence_id, nir_ssa_def *max_index_count)
+dgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
+ nir_ssa_def *draw_params_offset, nir_ssa_def *sequence_id, nir_ssa_def *max_index_count)
{
nir_ssa_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
nir_ssa_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
nir_ssa_def *draw_data0 = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset);
- nir_ssa_def *draw_data1 =
- nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd_imm(b, stream_offset, 16));
+ nir_ssa_def *draw_data1 = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd_imm(b, stream_offset, 16));
nir_ssa_def *index_count = nir_channel(b, draw_data0, 0);
nir_ssa_def *instance_count = nir_channel(b, draw_data0, 1);
nir_ssa_def *first_index = nir_channel(b, draw_data0, 2);
nir_pop_if(b, 0);
}
-
/**
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV.
*/
static void
-dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf,
- nir_ssa_def *stream_base, nir_ssa_def *index_buffer_offset,
- nir_ssa_def *ibo_type_32, nir_ssa_def *ibo_type_8,
- nir_variable *index_size_var, nir_variable *max_index_count_var,
- const struct radv_device *device)
+dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
+ nir_ssa_def *index_buffer_offset, nir_ssa_def *ibo_type_32, nir_ssa_def *ibo_type_8,
+ nir_variable *index_size_var, nir_variable *max_index_count_var, const struct radv_device *device)
{
- nir_ssa_def *index_stream_offset =
- nir_iadd(b, index_buffer_offset, stream_base);
- nir_ssa_def *data =
- nir_load_ssbo(b, 4, 32, stream_buf, index_stream_offset);
+ nir_ssa_def *index_stream_offset = nir_iadd(b, index_buffer_offset, stream_base);
+ nir_ssa_def *data = nir_load_ssbo(b, 4, 32, stream_buf, index_stream_offset);
nir_ssa_def *vk_index_type = nir_channel(b, data, 3);
- nir_ssa_def *index_type = nir_bcsel(
- b, nir_ieq(b, vk_index_type, ibo_type_32),
- nir_imm_int(b, V_028A7C_VGT_INDEX_32), nir_imm_int(b, V_028A7C_VGT_INDEX_16));
- index_type = nir_bcsel(b, nir_ieq(b, vk_index_type, ibo_type_8),
- nir_imm_int(b, V_028A7C_VGT_INDEX_8), index_type);
-
- nir_ssa_def *index_size = nir_iand_imm(
- b, nir_ushr(b, nir_imm_int(b, 0x142), nir_imul_imm(b, index_type, 4)), 0xf);
+ nir_ssa_def *index_type = nir_bcsel(b, nir_ieq(b, vk_index_type, ibo_type_32), nir_imm_int(b, V_028A7C_VGT_INDEX_32),
+ nir_imm_int(b, V_028A7C_VGT_INDEX_16));
+ index_type = nir_bcsel(b, nir_ieq(b, vk_index_type, ibo_type_8), nir_imm_int(b, V_028A7C_VGT_INDEX_8), index_type);
+
+ nir_ssa_def *index_size = nir_iand_imm(b, nir_ushr(b, nir_imm_int(b, 0x142), nir_imul_imm(b, index_type, 4)), 0xf);
nir_store_var(b, index_size_var, index_size, 0x1);
nir_ssa_def *max_index_count = nir_udiv(b, nir_channel(b, data, 2), index_size);
if (device->physical_device->rad_info.gfx_level >= GFX9) {
unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
if (device->physical_device->rad_info.gfx_level < GFX9 ||
- (device->physical_device->rad_info.gfx_level == GFX9 &&
- device->physical_device->rad_info.me_fw_version < 26))
+ (device->physical_device->rad_info.gfx_level == GFX9 && device->physical_device->rad_info.me_fw_version < 26))
opcode = PKT3_SET_UCONFIG_REG;
cmd_values[0] = nir_imm_int(b, PKT3(opcode, 1, 0));
- cmd_values[1] = nir_imm_int(
- b, (R_03090C_VGT_INDEX_TYPE - CIK_UCONFIG_REG_OFFSET) >> 2 | (2u << 28));
+ cmd_values[1] = nir_imm_int(b, (R_03090C_VGT_INDEX_TYPE - CIK_UCONFIG_REG_OFFSET) >> 2 | (2u << 28));
cmd_values[2] = index_type;
} else {
cmd_values[0] = nir_imm_int(b, PKT3(PKT3_INDEX_TYPE, 0, 0));
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_STATE_FLAGS_NV.
*/
static void
-dgc_emit_state(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf,
- nir_ssa_def *stream_base, nir_ssa_def *state_offset)
+dgc_emit_state(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
+ nir_ssa_def *state_offset)
{
nir_ssa_def *stream_offset = nir_iadd(b, state_offset, stream_base);
nir_ssa_def *state = nir_load_ssbo(b, 1, 32, stream_buf, stream_offset);
state = nir_iand_imm(b, state, 1);
- nir_ssa_def *reg =
- nir_ior(b, load_param32(b, pa_su_sc_mode_cntl_base), nir_ishl_imm(b, state, 2));
+ nir_ssa_def *reg = nir_ior(b, load_param32(b, pa_su_sc_mode_cntl_base), nir_ishl_imm(b, state, 2));
- nir_ssa_def *cmd_values[3] = {
- nir_imm_int(b, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)),
- nir_imm_int(b, (R_028814_PA_SU_SC_MODE_CNTL - SI_CONTEXT_REG_OFFSET) >> 2), reg};
+ nir_ssa_def *cmd_values[3] = {nir_imm_int(b, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)),
+ nir_imm_int(b, (R_028814_PA_SU_SC_MODE_CNTL - SI_CONTEXT_REG_OFFSET) >> 2), reg};
dgc_emit(b, cs, nir_vec(b, cmd_values, 3));
nir_push_if(b, nir_ine_imm(b, scissor_count, 0));
{
nir_ssa_def *scissor_offset = load_param16(b, scissor_offset);
- nir_variable *idx =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "scissor_copy_idx");
+ nir_variable *idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "scissor_copy_idx");
nir_store_var(b, idx, nir_imm_int(b, 0), 1);
nir_push_loop(b);
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV.
*/
static void
-dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf,
- nir_ssa_def *stream_base, nir_ssa_def *push_const_mask,
- nir_variable *upload_offset)
+dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
+ nir_ssa_def *push_const_mask, nir_variable *upload_offset)
{
nir_ssa_def *vbo_cnt = load_param8(b, vbo_cnt);
nir_ssa_def *const_copy = nir_ine_imm(b, load_param8(b, const_copy), 0);
nir_ssa_def *const_copy_words = nir_ushr_imm(b, const_copy_size, 2);
const_copy_words = nir_bcsel(b, const_copy, const_copy_words, nir_imm_int(b, 0));
- nir_variable *idx =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "const_copy_idx");
+ nir_variable *idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "const_copy_idx");
nir_store_var(b, idx, nir_imm_int(b, 0), 0x1);
nir_ssa_def *param_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PARAMS);
}
nir_pop_if(b, NULL);
- nir_variable *data =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "copy_data");
+ nir_variable *data = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "copy_data");
nir_ssa_def *update = nir_iand(b, push_const_mask, nir_ishl(b, nir_imm_int64(b, 1), cur_idx));
- update = nir_bcsel(b, nir_ult_imm(b, cur_idx, 64 /* bits in push_const_mask */), update,
- nir_imm_int64(b, 0));
+ update = nir_bcsel(b, nir_ult_imm(b, cur_idx, 64 /* bits in push_const_mask */), update, nir_imm_int64(b, 0));
nir_push_if(b, nir_ine_imm(b, update, 0));
{
- nir_ssa_def *stream_offset = nir_load_ssbo(
- b, 1, 32, param_buf, nir_iadd(b, param_offset_offset, nir_ishl_imm(b, cur_idx, 2)));
- nir_ssa_def *new_data =
- nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset));
+ nir_ssa_def *stream_offset =
+ nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_offset_offset, nir_ishl_imm(b, cur_idx, 2)));
+ nir_ssa_def *new_data = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset));
nir_store_var(b, data, new_data, 0x1);
}
nir_push_else(b, NULL);
{
nir_store_var(b, data,
- nir_load_ssbo(b, 1, 32, param_buf,
- nir_iadd(b, param_const_offset, nir_ishl_imm(b, cur_idx, 2))),
+ nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_const_offset, nir_ishl_imm(b, cur_idx, 2))),
0x1);
}
nir_pop_if(b, NULL);
}
nir_pop_loop(b, NULL);
- nir_variable *shader_idx =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "shader_idx");
+ nir_variable *shader_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "shader_idx");
nir_store_var(b, shader_idx, nir_imm_int(b, 0), 0x1);
nir_ssa_def *shader_cnt = load_param16(b, push_constant_shader_cnt);
}
nir_pop_if(b, NULL);
- nir_ssa_def *reg_info = nir_load_ssbo(
- b, 3, 32, param_buf, nir_iadd(b, param_offset, nir_imul_imm(b, cur_shader_idx, 12)));
+ nir_ssa_def *reg_info =
+ nir_load_ssbo(b, 3, 32, param_buf, nir_iadd(b, param_offset, nir_imul_imm(b, cur_shader_idx, 12)));
nir_ssa_def *upload_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 0, 16);
nir_ssa_def *inline_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 16, 16);
nir_ssa_def *inline_mask = nir_pack_64_2x32(b, nir_channels(b, reg_info, 0x6));
nir_push_if(b, nir_ine_imm(b, upload_sgpr, 0));
{
- nir_ssa_def *pkt[3] = {
- nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), upload_sgpr,
- nir_iadd(b, load_param32(b, upload_addr), nir_load_var(b, upload_offset))};
+ nir_ssa_def *pkt[3] = {nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), upload_sgpr,
+ nir_iadd(b, load_param32(b, upload_addr), nir_load_var(b, upload_offset))};
dgc_emit(b, cs, nir_vec(b, pkt, 3));
}
}
nir_pop_if(b, NULL);
- nir_variable *data =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "copy_data");
+ nir_variable *data = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "copy_data");
- nir_ssa_def *update =
- nir_iand(b, push_const_mask, nir_ishl(b, nir_imm_int64(b, 1), cur_idx));
- update = nir_bcsel(b, nir_ult_imm(b, cur_idx, 64 /* bits in push_const_mask */), update,
- nir_imm_int64(b, 0));
+ nir_ssa_def *update = nir_iand(b, push_const_mask, nir_ishl(b, nir_imm_int64(b, 1), cur_idx));
+ update =
+ nir_bcsel(b, nir_ult_imm(b, cur_idx, 64 /* bits in push_const_mask */), update, nir_imm_int64(b, 0));
nir_push_if(b, nir_ine_imm(b, update, 0));
{
nir_ssa_def *stream_offset =
- nir_load_ssbo(b, 1, 32, param_buf,
- nir_iadd(b, param_offset_offset, nir_ishl_imm(b, cur_idx, 2)));
- nir_ssa_def *new_data =
- nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset));
+ nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_offset_offset, nir_ishl_imm(b, cur_idx, 2)));
+ nir_ssa_def *new_data = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset));
nir_store_var(b, data, new_data, 0x1);
}
nir_push_else(b, NULL);
{
nir_store_var(
b, data,
- nir_load_ssbo(b, 1, 32, param_buf,
- nir_iadd(b, param_const_offset, nir_ishl_imm(b, cur_idx, 2))),
+ nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_const_offset, nir_ishl_imm(b, cur_idx, 2))),
0x1);
}
nir_pop_if(b, NULL);
* For emitting VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV.
*/
static void
-dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf,
- nir_ssa_def *stream_base, nir_ssa_def *vbo_bind_mask,
- nir_variable *upload_offset, const struct radv_device *device)
+dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
+ nir_ssa_def *vbo_bind_mask, nir_variable *upload_offset, const struct radv_device *device)
{
nir_ssa_def *vbo_cnt = load_param8(b, vbo_cnt);
- nir_variable *vbo_idx =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx");
+ nir_variable *vbo_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx");
nir_store_var(b, vbo_idx, nir_imm_int(b, 0), 0x1);
nir_push_loop(b);
nir_pop_if(b, NULL);
nir_ssa_def *vbo_offset = nir_imul_imm(b, nir_load_var(b, vbo_idx), 16);
- nir_variable *vbo_data =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_uvec4_type(), "vbo_data");
+ nir_variable *vbo_data = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uvec4_type(), "vbo_data");
nir_ssa_def *param_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PARAMS);
nir_store_var(b, vbo_data, nir_load_ssbo(b, 4, 32, param_buf, vbo_offset), 0xf);
- nir_ssa_def *vbo_override = nir_ine_imm(
- b, nir_iand(b, vbo_bind_mask, nir_ishl(b, nir_imm_int(b, 1), nir_load_var(b, vbo_idx))),
- 0);
+ nir_ssa_def *vbo_override =
+ nir_ine_imm(b, nir_iand(b, vbo_bind_mask, nir_ishl(b, nir_imm_int(b, 1), nir_load_var(b, vbo_idx))), 0);
nir_push_if(b, vbo_override);
{
nir_ssa_def *vbo_offset_offset =
nir_ssa_def *size = nir_channel(b, stream_data, 2);
nir_ssa_def *stride = nir_channel(b, stream_data, 3);
- nir_ssa_def *dyn_stride =
- nir_test_mask(b, nir_channel(b, vbo_over_data, 0), DGC_DYNAMIC_STRIDE);
- nir_ssa_def *old_stride =
- nir_ubfe_imm(b, nir_channel(b, nir_load_var(b, vbo_data), 1), 16, 14);
+ nir_ssa_def *dyn_stride = nir_test_mask(b, nir_channel(b, vbo_over_data, 0), DGC_DYNAMIC_STRIDE);
+ nir_ssa_def *old_stride = nir_ubfe_imm(b, nir_channel(b, nir_load_var(b, vbo_data), 1), 16, 14);
stride = nir_bcsel(b, dyn_stride, stride, old_stride);
- nir_ssa_def *use_per_attribute_vb_descs =
- nir_test_mask(b, nir_channel(b, vbo_over_data, 0), 1u << 31);
+ nir_ssa_def *use_per_attribute_vb_descs = nir_test_mask(b, nir_channel(b, vbo_over_data, 0), 1u << 31);
nir_variable *num_records =
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "num_records");
nir_store_var(b, num_records, size, 0x1);
nir_push_if(b, use_per_attribute_vb_descs);
{
nir_ssa_def *attrib_end = nir_ubfe_imm(b, nir_channel(b, vbo_over_data, 1), 16, 16);
- nir_ssa_def *attrib_index_offset =
- nir_ubfe_imm(b, nir_channel(b, vbo_over_data, 1), 0, 16);
+ nir_ssa_def *attrib_index_offset = nir_ubfe_imm(b, nir_channel(b, vbo_over_data, 1), 0, 16);
nir_push_if(b, nir_ult(b, nir_load_var(b, num_records), attrib_end));
{
nir_push_else(b, NULL);
{
nir_ssa_def *r = nir_iadd(
- b,
- nir_iadd_imm(
- b, nir_udiv(b, nir_isub(b, nir_load_var(b, num_records), attrib_end), stride),
- 1),
+ b, nir_iadd_imm(b, nir_udiv(b, nir_isub(b, nir_load_var(b, num_records), attrib_end), stride), 1),
attrib_index_offset);
nir_store_var(b, num_records, r, 0x1);
}
convert_cond = nir_iand(b, convert_cond, nir_ieq_imm(b, stride, 0));
nir_ssa_def *new_records =
- nir_iadd(b, nir_imul(b, nir_iadd_imm(b, nir_load_var(b, num_records), -1), stride),
- attrib_end);
+ nir_iadd(b, nir_imul(b, nir_iadd_imm(b, nir_load_var(b, num_records), -1), stride), attrib_end);
new_records = nir_bcsel(b, convert_cond, new_records, nir_load_var(b, num_records));
nir_store_var(b, num_records, new_records, 0x1);
}
if (device->physical_device->rad_info.gfx_level != GFX8) {
nir_push_if(b, nir_ine_imm(b, stride, 0));
{
- nir_ssa_def *r =
- nir_iadd(b, nir_load_var(b, num_records), nir_iadd_imm(b, stride, -1));
+ nir_ssa_def *r = nir_iadd(b, nir_load_var(b, num_records), nir_iadd_imm(b, stride, -1));
nir_store_var(b, num_records, nir_udiv(b, r, stride), 0x1);
}
nir_pop_if(b, NULL);
nir_ssa_def *rsrc_word3 = nir_channel(b, nir_load_var(b, vbo_data), 3);
if (device->physical_device->rad_info.gfx_level >= GFX10) {
- nir_ssa_def *oob_select =
- nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW),
- nir_imm_int(b, V_008F0C_OOB_SELECT_STRUCTURED));
+ nir_ssa_def *oob_select = nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW),
+ nir_imm_int(b, V_008F0C_OOB_SELECT_STRUCTURED));
rsrc_word3 = nir_iand_imm(b, rsrc_word3, C_008F0C_OOB_SELECT);
rsrc_word3 = nir_ior(b, rsrc_word3, nir_ishl_imm(b, oob_select, 28));
}
nir_ssa_def *va_hi = nir_iand_imm(b, nir_unpack_64_2x32_split_y(b, va), 0xFFFF);
stride = nir_iand_imm(b, stride, 0x3FFF);
nir_ssa_def *new_vbo_data[4] = {nir_unpack_64_2x32_split_x(b, va),
- nir_ior(b, nir_ishl_imm(b, stride, 16), va_hi),
- nir_load_var(b, num_records), rsrc_word3};
+ nir_ior(b, nir_ishl_imm(b, stride, 16), va_hi), nir_load_var(b, num_records),
+ rsrc_word3};
nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf);
}
nir_pop_if(b, NULL);
*/
nir_ssa_def *num_records = nir_channel(b, nir_load_var(b, vbo_data), 2);
nir_ssa_def *buf_va =
- nir_iand_imm(b, nir_pack_64_2x32(b, nir_trim_vector(b, nir_load_var(b, vbo_data), 2)),
- (1ull << 48) - 1ull);
+ nir_iand_imm(b, nir_pack_64_2x32(b, nir_trim_vector(b, nir_load_var(b, vbo_data), 2)), (1ull << 48) - 1ull);
nir_push_if(b, nir_ior(b, nir_ieq_imm(b, num_records, 0), nir_ieq_imm(b, buf_va, 0)));
{
- nir_ssa_def *new_vbo_data[4] = {nir_imm_int(b, 0), nir_imm_int(b, 0), nir_imm_int(b, 0),
- nir_imm_int(b, 0)};
+ nir_ssa_def *new_vbo_data[4] = {nir_imm_int(b, 0), nir_imm_int(b, 0), nir_imm_int(b, 0), nir_imm_int(b, 0)};
nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf);
}
nir_pop_if(b, NULL);
nir_ssa_def *upload_off = nir_iadd(b, nir_load_var(b, upload_offset), vbo_offset);
- nir_store_ssbo(b, nir_load_var(b, vbo_data), cs->descriptor, upload_off,
- .access = ACCESS_NON_READABLE);
+ nir_store_ssbo(b, nir_load_var(b, vbo_data), cs->descriptor, upload_off, .access = ACCESS_NON_READABLE);
nir_store_var(b, vbo_idx, nir_iadd_imm(b, nir_load_var(b, vbo_idx), 1), 0x1);
}
nir_pop_loop(b, NULL);
- nir_ssa_def *packet[3] = {
- nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), load_param16(b, vbo_reg),
- nir_iadd(b, load_param32(b, upload_addr), nir_load_var(b, upload_offset))};
+ nir_ssa_def *packet[3] = {nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), load_param16(b, vbo_reg),
+ nir_iadd(b, load_param32(b, upload_addr), nir_load_var(b, upload_offset))};
dgc_emit(b, cs, nir_vec(b, packet, 3));
- nir_store_var(b, upload_offset,
- nir_iadd(b, nir_load_var(b, upload_offset), nir_imul_imm(b, vbo_cnt, 16)), 0x1);
+ nir_store_var(b, upload_offset, nir_iadd(b, nir_load_var(b, upload_offset), nir_imul_imm(b, vbo_cnt, 16)), 0x1);
}
static nir_shader *
nir_variable *upload_offset =
nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "upload_offset");
- nir_store_var(&b, upload_offset,
- nir_iadd(&b, load_param32(&b, cmd_buf_size),
- nir_imul(&b, load_param32(&b, upload_stride), sequence_id)),
- 0x1);
+ nir_store_var(
+ &b, upload_offset,
+ nir_iadd(&b, load_param32(&b, cmd_buf_size), nir_imul(&b, load_param32(&b, upload_stride), sequence_id)), 0x1);
nir_ssa_def *vbo_bind_mask = load_param32(&b, vbo_bind_mask);
nir_push_if(&b, nir_ine_imm(&b, vbo_bind_mask, 0));
{
- dgc_emit_vertex_buffer(&b, &cmd_buf, stream_buf, stream_base, vbo_bind_mask, upload_offset,
- dev);
+ dgc_emit_vertex_buffer(&b, &cmd_buf, stream_buf, stream_base, vbo_bind_mask, upload_offset, dev);
}
nir_pop_if(&b, NULL);
-
nir_ssa_def *push_const_mask = load_param64(&b, push_constant_mask);
nir_push_if(&b, nir_ine_imm(&b, push_const_mask, 0));
{
- dgc_emit_push_constant(&b, &cmd_buf, stream_buf, stream_base, push_const_mask,
- upload_offset);
+ dgc_emit_push_constant(&b, &cmd_buf, stream_buf, stream_base, push_const_mask, upload_offset);
}
nir_pop_if(&b, 0);
nir_push_if(&b, nir_ieq_imm(&b, load_param16(&b, draw_indexed), 0));
{
- dgc_emit_draw(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, draw_params_offset),
- sequence_id);
+ dgc_emit_draw(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, draw_params_offset), sequence_id);
}
nir_push_else(&b, NULL);
{
nir_ssa_def *bind_index_buffer = nir_ieq_imm(&b, nir_load_var(&b, index_size_var), 0);
nir_push_if(&b, bind_index_buffer);
{
- dgc_emit_index_buffer(&b, &cmd_buf, stream_buf, stream_base,
- load_param16(&b, index_buffer_offset),
- load_param32(&b, ibo_type_32), load_param32(&b, ibo_type_8),
- index_size_var, max_index_count_var, dev);
+ dgc_emit_index_buffer(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, index_buffer_offset),
+ load_param32(&b, ibo_type_32), load_param32(&b, ibo_type_8), index_size_var,
+ max_index_count_var, dev);
}
nir_pop_if(&b, NULL);
nir_ssa_def *index_size = nir_load_var(&b, index_size_var);
nir_ssa_def *max_index_count = nir_load_var(&b, max_index_count_var);
- index_size =
- nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, index_size_var), index_size);
- max_index_count = nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, max_index_count_var),
- max_index_count);
+ index_size = nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, index_size_var), index_size);
+ max_index_count = nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, max_index_count_var), max_index_count);
- dgc_emit_draw_indexed(&b, &cmd_buf, stream_buf, stream_base,
- load_param16(&b, draw_params_offset), sequence_id, max_index_count);
+ dgc_emit_draw_indexed(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, draw_params_offset), sequence_id,
+ max_index_count);
}
nir_pop_if(&b, NULL);
{
radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.dgc_prepare.pipeline,
&device->meta_state.alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- device->meta_state.dgc_prepare.p_layout, &device->meta_state.alloc);
- device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
- device->meta_state.dgc_prepare.ds_layout,
- &device->meta_state.alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), device->meta_state.dgc_prepare.p_layout,
+ &device->meta_state.alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(
+ radv_device_to_handle(device), device->meta_state.dgc_prepare.ds_layout, &device->meta_state.alloc);
}
VkResult
VkResult result;
nir_shader *cs = build_dgc_prepare_shader(device);
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = DGC_NUM_DESCS,
- .pBindings = (VkDescriptorSetLayoutBinding[]){
- {.binding = DGC_DESC_STREAM,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- {.binding = DGC_DESC_PREPARE,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- {.binding = DGC_DESC_PARAMS,
- .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- {.binding = DGC_DESC_COUNT,
- .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL},
- }};
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
- &device->meta_state.alloc,
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = DGC_NUM_DESCS,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = DGC_DESC_STREAM,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = DGC_DESC_PREPARE,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = DGC_DESC_PARAMS,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = DGC_DESC_COUNT,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
&device->meta_state.dgc_prepare.ds_layout);
if (result != VK_SUCCESS)
goto cleanup;
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.dgc_prepare.ds_layout,
.pushConstantRangeCount = 1,
- .pPushConstantRanges =
- &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct radv_dgc_params)},
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct radv_dgc_params)},
};
- result = radv_CreatePipelineLayout(radv_device_to_handle(device), &leaf_pl_create_info,
- &device->meta_state.alloc,
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &leaf_pl_create_info, &device->meta_state.alloc,
&device->meta_state.dgc_prepare.p_layout);
if (result != VK_SUCCESS)
goto cleanup;
.layout = device->meta_state.dgc_prepare.p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &pipeline_info, &device->meta_state.alloc,
- &device->meta_state.dgc_prepare.pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &pipeline_info,
+ &device->meta_state.alloc, &device->meta_state.dgc_prepare.pipeline);
if (result != VK_SUCCESS)
goto cleanup;
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_CreateIndirectCommandsLayoutNV(VkDevice _device,
- const VkIndirectCommandsLayoutCreateInfoNV *pCreateInfo,
+radv_CreateIndirectCommandsLayoutNV(VkDevice _device, const VkIndirectCommandsLayoutCreateInfoNV *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkIndirectCommandsLayoutNV *pIndirectCommandsLayout)
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_indirect_command_layout *layout;
- size_t size =
- sizeof(*layout) + pCreateInfo->tokenCount * sizeof(VkIndirectCommandsLayoutTokenNV);
+ size_t size = sizeof(*layout) + pCreateInfo->tokenCount * sizeof(VkIndirectCommandsLayoutTokenNV);
- layout =
- vk_zalloc2(&device->vk.alloc, pAllocator, size, alignof(struct radv_indirect_command_layout),
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ layout = vk_zalloc2(&device->vk.alloc, pAllocator, size, alignof(struct radv_indirect_command_layout),
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!layout)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV:
layout->bind_vbo_mask |= 1u << pCreateInfo->pTokens[i].vertexBindingUnit;
- layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] =
- pCreateInfo->pTokens[i].offset;
+ layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] = pCreateInfo->pTokens[i].offset;
if (pCreateInfo->pTokens[i].vertexDynamicStride)
layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] |= DGC_DYNAMIC_STRIDE;
break;
}
VKAPI_ATTR void VKAPI_CALL
-radv_DestroyIndirectCommandsLayoutNV(VkDevice _device,
- VkIndirectCommandsLayoutNV indirectCommandsLayout,
+radv_DestroyIndirectCommandsLayoutNV(VkDevice _device, VkIndirectCommandsLayoutNV indirectCommandsLayout,
const VkAllocationCallbacks *pAllocator)
{
RADV_FROM_HANDLE(radv_device, device, _device);
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetGeneratedCommandsMemoryRequirementsNV(
- VkDevice _device, const VkGeneratedCommandsMemoryRequirementsInfoNV *pInfo,
- VkMemoryRequirements2 *pMemoryRequirements)
+radv_GetGeneratedCommandsMemoryRequirementsNV(VkDevice _device,
+ const VkGeneratedCommandsMemoryRequirementsInfoNV *pInfo,
+ VkMemoryRequirements2 *pMemoryRequirements)
{
RADV_FROM_HANDLE(radv_device, device, _device);
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pInfo->indirectCommandsLayout);
VkDeviceSize cmd_buf_size = radv_align_cmdbuf_size(cmd_stride * pInfo->maxSequencesCount);
VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount;
- pMemoryRequirements->memoryRequirements.memoryTypeBits =
- device->physical_device->memory_types_32bit;
+ pMemoryRequirements->memoryRequirements.memoryTypeBits = device->physical_device->memory_types_32bit;
pMemoryRequirements->memoryRequirements.alignment = 256;
pMemoryRequirements->memoryRequirements.size =
align(cmd_buf_size + upload_buf_size, pMemoryRequirements->memoryRequirements.alignment);
/* Always need to call this directly before draw due to dependence on bound state. */
void
-radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer,
- const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
+radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
{
- VK_FROM_HANDLE(radv_indirect_command_layout, layout,
- pGeneratedCommandsInfo->indirectCommandsLayout);
+ VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout);
VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline);
VK_FROM_HANDLE(radv_buffer, prep_buffer, pGeneratedCommandsInfo->preprocessBuffer);
struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
uint32_t cmd_stride, upload_stride;
radv_get_sequence_size(layout, graphics_pipeline, &cmd_stride, &upload_stride);
- unsigned cmd_buf_size =
- radv_align_cmdbuf_size(cmd_stride * pGeneratedCommandsInfo->sequencesCount);
+ unsigned cmd_buf_size = radv_align_cmdbuf_size(cmd_stride * pGeneratedCommandsInfo->sequencesCount);
unsigned vb_size = layout->bind_vbo_mask ? util_bitcount(vs->info.vs.vb_desc_usage_mask) * 24 : 0;
unsigned const_size = graphics_pipeline->base.push_constant_size +
- 16 * graphics_pipeline->base.dynamic_offset_count +
- sizeof(layout->push_constant_offsets) + ARRAY_SIZE(graphics_pipeline->base.shaders) * 12;
+ 16 * graphics_pipeline->base.dynamic_offset_count + sizeof(layout->push_constant_offsets) +
+ ARRAY_SIZE(graphics_pipeline->base.shaders) * 12;
if (!layout->push_constant_mask)
const_size = 0;
void *upload_data_base = upload_data;
- radv_buffer_init(&token_buffer, cmd_buffer->device, cmd_buffer->upload.upload_bo, upload_size,
- upload_offset);
+ radv_buffer_init(&token_buffer, cmd_buffer->device, cmd_buffer->upload.upload_bo, upload_size, upload_offset);
- uint64_t upload_addr = radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset +
- pGeneratedCommandsInfo->preprocessOffset;
+ uint64_t upload_addr =
+ radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset + pGeneratedCommandsInfo->preprocessOffset;
- uint16_t vtx_base_sgpr =
- (cmd_buffer->state.graphics_pipeline->vtx_base_sgpr - SI_SH_REG_OFFSET) >> 2;
+ uint16_t vtx_base_sgpr = (cmd_buffer->state.graphics_pipeline->vtx_base_sgpr - SI_SH_REG_OFFSET) >> 2;
if (cmd_buffer->state.graphics_pipeline->uses_drawid)
vtx_base_sgpr |= DGC_USES_DRAWID;
if (cmd_buffer->state.graphics_pipeline->uses_baseinstance)
vtx_base_sgpr |= DGC_USES_BASEINSTANCE;
- const struct radv_shader *vertex_shader =
- radv_get_shader(graphics_pipeline->base.shaders, MESA_SHADER_VERTEX);
- uint16_t vbo_sgpr = ((radv_get_user_sgpr(vertex_shader, AC_UD_VS_VERTEX_BUFFERS)->sgpr_idx * 4 +
- vertex_shader->info.user_data_0) -
- SI_SH_REG_OFFSET) >>
- 2;
+ const struct radv_shader *vertex_shader = radv_get_shader(graphics_pipeline->base.shaders, MESA_SHADER_VERTEX);
+ uint16_t vbo_sgpr =
+ ((radv_get_user_sgpr(vertex_shader, AC_UD_VS_VERTEX_BUFFERS)->sgpr_idx * 4 + vertex_shader->info.user_data_0) -
+ SI_SH_REG_OFFSET) >>
+ 2;
struct radv_dgc_params params = {
.cmd_buf_stride = cmd_stride,
.cmd_buf_size = cmd_buf_size,
.stream_stride = layout->input_stride,
.draw_indexed = layout->indexed,
.draw_params_offset = layout->draw_params_offset,
- .base_index_size =
- layout->binds_index_buffer ? 0 : radv_get_vgt_index_size(cmd_buffer->state.index_type),
+ .base_index_size = layout->binds_index_buffer ? 0 : radv_get_vgt_index_size(cmd_buffer->state.index_type),
.vtx_base_sgpr = vtx_base_sgpr,
.max_index_count = cmd_buffer->state.max_index_count,
.index_buffer_offset = layout->index_buffer_offset,
uint32_t attrib_end = graphics_pipeline->attrib_ends[i];
params.vbo_bind_mask |= ((layout->bind_vbo_mask >> binding) & 1u) << idx;
- vbo_info[2 * idx] = ((vertex_shader->info.vs.use_per_attribute_vb_descs ? 1u : 0u) << 31) |
- layout->vbo_offsets[binding];
+ vbo_info[2 * idx] =
+ ((vertex_shader->info.vs.use_per_attribute_vb_descs ? 1u : 0u) << 31) | layout->vbo_offsets[binding];
vbo_info[2 * idx + 1] = graphics_pipeline->attrib_index_offset[i] | (attrib_end << 16);
++idx;
}
unsigned inline_sgpr = 0;
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
- upload_sgpr =
- (shader->info.user_data_0 + 4 * locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx -
- SI_SH_REG_OFFSET) >>
- 2;
+ upload_sgpr = (shader->info.user_data_0 + 4 * locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx -
+ SI_SH_REG_OFFSET) >>
+ 2;
}
if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
- inline_sgpr = (shader->info.user_data_0 +
- 4 * locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx -
+ inline_sgpr = (shader->info.user_data_0 + 4 * locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx -
SI_SH_REG_OFFSET) >>
2;
desc[idx * 3 + 1] = graphics_pipeline->base.shaders[i]->info.inline_push_constant_mask;
params.push_constant_shader_cnt = idx;
- params.const_copy_size = graphics_pipeline->base.push_constant_size +
- 16 * graphics_pipeline->base.dynamic_offset_count;
+ params.const_copy_size =
+ graphics_pipeline->base.push_constant_size + 16 * graphics_pipeline->base.dynamic_offset_count;
params.push_constant_mask = layout->push_constant_mask;
memcpy(upload_data, layout->push_constant_offsets, sizeof(layout->push_constant_offsets));
}
if (scissor_size) {
- params.scissor_offset = (char*)upload_data - (char*)upload_data_base;
+ params.scissor_offset = (char *)upload_data - (char *)upload_data_base;
params.scissor_count = scissor_size / 4;
- struct radeon_cmdbuf scissor_cs = {
- .buf = upload_data,
- .cdw = 0,
- .max_dw = scissor_size / 4
- };
+ struct radeon_cmdbuf scissor_cs = {.buf = upload_data, .cdw = 0, .max_dw = scissor_size / 4};
radv_write_scissors(cmd_buffer, &scissor_cs);
assert(scissor_cs.cdw * 4 == scissor_size);
VkWriteDescriptorSet ds_writes[5];
VkDescriptorBufferInfo buf_info[ARRAY_SIZE(ds_writes)];
int ds_cnt = 0;
- buf_info[ds_cnt] = (VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&token_buffer),
- .offset = 0,
- .range = upload_size};
+ buf_info[ds_cnt] =
+ (VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&token_buffer), .offset = 0, .range = upload_size};
ds_writes[ds_cnt] = (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = DGC_DESC_PARAMS,
.dstArrayElement = 0,
++ds_cnt;
if (pGeneratedCommandsInfo->streamCount > 0) {
- buf_info[ds_cnt] =
- (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->pStreams[0].buffer,
- .offset = pGeneratedCommandsInfo->pStreams[0].offset,
- .range = VK_WHOLE_SIZE};
- ds_writes[ds_cnt] =
- (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = DGC_DESC_STREAM,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &buf_info[ds_cnt]};
+ buf_info[ds_cnt] = (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->pStreams[0].buffer,
+ .offset = pGeneratedCommandsInfo->pStreams[0].offset,
+ .range = VK_WHOLE_SIZE};
+ ds_writes[ds_cnt] = (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = DGC_DESC_STREAM,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &buf_info[ds_cnt]};
++ds_cnt;
}
if (pGeneratedCommandsInfo->sequencesCountBuffer != VK_NULL_HANDLE) {
- buf_info[ds_cnt] =
- (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->sequencesCountBuffer,
- .offset = pGeneratedCommandsInfo->sequencesCountOffset,
- .range = VK_WHOLE_SIZE};
- ds_writes[ds_cnt] =
- (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = DGC_DESC_COUNT,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &buf_info[ds_cnt]};
+ buf_info[ds_cnt] = (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->sequencesCountBuffer,
+ .offset = pGeneratedCommandsInfo->sequencesCountOffset,
+ .range = VK_WHOLE_SIZE};
+ ds_writes[ds_cnt] = (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = DGC_DESC_COUNT,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &buf_info[ds_cnt]};
++ds_cnt;
params.sequence_count |= 1u << 31;
}
- radv_meta_save(
- &saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
cmd_buffer->device->meta_state.dgc_prepare.pipeline);
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- cmd_buffer->device->meta_state.dgc_prepare.p_layout,
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), cmd_buffer->device->meta_state.dgc_prepare.p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(params), ¶ms);
radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
- cmd_buffer->device->meta_state.dgc_prepare.p_layout, 0, ds_cnt,
- ds_writes);
+ cmd_buffer->device->meta_state.dgc_prepare.p_layout, 0, ds_cnt, ds_writes);
unsigned block_count = MAX2(1, round_up_u32(pGeneratedCommandsInfo->sequencesCount, 64));
vk_common_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
radv_buffer_finish(&token_buffer);
radv_meta_restore(&saved_state, cmd_buffer);
- cmd_buffer->state.flush_bits |=
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2;
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2;
}
#include "radv_private.h"
void
-radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device,
- struct radeon_winsys_bo *bo)
+radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device, struct radeon_winsys_bo *bo)
{
memset(mem, 0, sizeof(*mem));
vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY);
}
void
-radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
- struct radv_device_memory *mem)
+radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator, struct radv_device_memory *mem)
{
if (mem == NULL)
return;
assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
- const VkImportMemoryFdInfoKHR *import_info =
- vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
+ const VkImportMemoryFdInfoKHR *import_info = vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
const VkMemoryDedicatedAllocateInfo *dedicate_info =
vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
const VkExportMemoryAllocateInfo *export_info =
vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
if (pAllocateInfo->allocationSize == 0 && !ahb_import_info &&
- !(export_info && (export_info->handleTypes &
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {
+ !(export_info &&
+ (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {
/* Apparently, this is allowed */
*pMem = VK_NULL_HANDLE;
return VK_SUCCESS;
}
- mem =
- vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ mem = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (mem == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (replay_info && replay_info->opaqueCaptureAddress)
replay_address = replay_info->opaqueCaptureAddress;
- unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
- (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
+ unsigned priority =
+ MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1, (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
mem->user_ptr = NULL;
result = radv_import_ahb_memory(device, mem, priority, ahb_import_info);
if (result != VK_SUCCESS)
goto fail;
- } else if (export_info && (export_info->handleTypes &
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {
+ } else if (export_info &&
+ (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {
result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo);
if (result != VK_SUCCESS)
goto fail;
close(import_info->fd);
}
- if (mem->image && mem->image->plane_count == 1 &&
- !vk_format_is_depth_or_stencil(mem->image->vk.format) && mem->image->vk.samples == 1 &&
- mem->image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
+ if (mem->image && mem->image->plane_count == 1 && !vk_format_is_depth_or_stencil(mem->image->vk.format) &&
+ mem->image->vk.samples == 1 && mem->image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
struct radeon_bo_metadata metadata;
device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);
- struct radv_image_create_info create_info = {.no_metadata_planes = true,
- .bo_metadata = &metadata};
+ struct radv_image_create_info create_info = {.no_metadata_planes = true, .bo_metadata = &metadata};
/* This gives a basic ability to import radeonsi images
* that don't have DCC. This is not guaranteed by any
}
} else if (host_ptr_info) {
assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
- result = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
- pAllocateInfo->allocationSize, priority, &mem->bo);
+ result = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer, pAllocateInfo->allocationSize,
+ priority, &mem->bo);
if (result != VK_SUCCESS) {
goto fail;
} else {
uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
uint32_t heap_index;
- heap_index =
- device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex]
- .heapIndex;
+ heap_index = device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex;
domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
flags |= RADEON_FLAG_ZERO_VRAM;
if (device->overallocation_disallowed) {
- uint64_t total_size =
- device->physical_device->memory_properties.memoryHeaps[heap_index].size;
+ uint64_t total_size = device->physical_device->memory_properties.memoryHeaps[heap_index].size;
mtx_lock(&device->overallocation_mutex);
if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
mtx_unlock(&device->overallocation_mutex);
}
- result = device->ws->buffer_create(device->ws, alloc_size,
- device->physical_device->rad_info.max_alignment, domain,
- flags, priority, replay_address, &mem->bo);
+ result = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
+ domain, flags, priority, replay_address, &mem->bo);
if (result != VK_SUCCESS) {
if (device->overallocation_disallowed) {
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
- const VkMappedMemoryRange *pMemoryRanges)
+radv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges)
{
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
- const VkMappedMemoryRange *pMemoryRanges)
+radv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges)
{
return VK_SUCCESS;
}
VKAPI_ATTR uint64_t VKAPI_CALL
-radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,
- const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo)
+radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo)
{
RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory);
return radv_buffer_get_va(mem->bo);
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory,
- VkDeviceSize *pCommittedMemoryInBytes)
+radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory, VkDeviceSize *pCommittedMemoryInBytes)
{
*pCommittedMemoryInBytes = 0;
}
#include "radv_private.h"
static void
-radv_destroy_event(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
- struct radv_event *event)
+radv_destroy_event(struct radv_device *device, const VkAllocationCallbacks *pAllocator, struct radv_event *event)
{
if (event->bo)
device->ws->buffer_destroy(device->ws, event->bo);
struct radv_event *event;
VkResult result;
- event = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ event = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!event)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
bo_flags = RADEON_FLAG_CPU_ACCESS;
}
- result = device->ws->buffer_create(
- device->ws, 8, 8, bo_domain,
- RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_NO_INTERPROCESS_SHARING | bo_flags,
- RADV_BO_PRIORITY_FENCE, 0, &event->bo);
+ result = device->ws->buffer_create(device->ws, 8, 8, bo_domain,
+ RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_NO_INTERPROCESS_SHARING | bo_flags,
+ RADV_BO_PRIORITY_FENCE, 0, &event->bo);
if (result != VK_SUCCESS) {
radv_destroy_event(device, pAllocator, event);
return vk_error(device, result);
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator, VkEvent *pEvent)
+radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator,
+ VkEvent *pEvent)
{
RADV_FROM_HANDLE(radv_device, device, _device);
VkResult result = radv_create_event(device, pCreateInfo, pAllocator, pEvent, false);
#include "vk_android.h"
#include "vk_util.h"
-#include "ac_drm_fourcc.h"
#include "util/format_r11g11b10f.h"
#include "util/format_rgb9e5.h"
#include "util/format_srgb.h"
#include "util/half_float.h"
-#include "vulkan/util/vk_format.h"
#include "vulkan/util/vk_enum_defines.h"
+#include "vulkan/util/vk_format.h"
+#include "ac_drm_fourcc.h"
uint32_t
radv_translate_buffer_dataformat(const struct util_format_description *desc, int first_non_void)
}
uint32_t
-radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc,
- int first_non_void)
+radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc, int first_non_void)
{
bool uniform = true;
int i;
if (!uniform) {
switch (desc->nr_channels) {
case 3:
- if (desc->channel[0].size == 5 && desc->channel[1].size == 6 &&
- desc->channel[2].size == 5) {
+ if (desc->channel[0].size == 5 && desc->channel[1].size == 6 && desc->channel[2].size == 5) {
return V_008F14_IMG_DATA_FORMAT_5_6_5;
}
goto out_unknown;
case 4:
- if (desc->channel[0].size == 5 && desc->channel[1].size == 5 &&
- desc->channel[2].size == 5 && desc->channel[3].size == 1) {
+ if (desc->channel[0].size == 5 && desc->channel[1].size == 5 && desc->channel[2].size == 5 &&
+ desc->channel[3].size == 1) {
return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
}
- if (desc->channel[0].size == 1 && desc->channel[1].size == 5 &&
- desc->channel[2].size == 5 && desc->channel[3].size == 5) {
+ if (desc->channel[0].size == 1 && desc->channel[1].size == 5 && desc->channel[2].size == 5 &&
+ desc->channel[3].size == 5) {
return V_008F14_IMG_DATA_FORMAT_5_5_5_1;
}
- if (desc->channel[0].size == 10 && desc->channel[1].size == 10 &&
- desc->channel[2].size == 10 && desc->channel[3].size == 2) {
+ if (desc->channel[0].size == 10 && desc->channel[1].size == 10 && desc->channel[2].size == 10 &&
+ desc->channel[3].size == 2) {
/* Closed VK driver does this also no 2/10/10/10 snorm */
if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED && desc->channel[0].normalized)
goto out_unknown;
}
uint32_t
-radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc,
- int first_non_void)
+radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc, int first_non_void)
{
assert(vk_format_get_plane_count(format) == 1);
{
const struct util_format_description *desc = vk_format_description(format);
uint32_t num_format;
- if (format == VK_FORMAT_UNDEFINED || format == VK_FORMAT_R64_UINT ||
- format == VK_FORMAT_R64_SINT)
+ if (format == VK_FORMAT_UNDEFINED || format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
return false;
- num_format =
- radv_translate_tex_numformat(format, desc, vk_format_get_first_non_void_channel(format));
+ num_format = radv_translate_tex_numformat(format, desc, vk_format_get_first_non_void_channel(format));
- if (num_format == V_008F14_IMG_NUM_FORMAT_USCALED ||
- num_format == V_008F14_IMG_NUM_FORMAT_SSCALED)
+ if (num_format == V_008F14_IMG_NUM_FORMAT_USCALED || num_format == V_008F14_IMG_NUM_FORMAT_SSCALED)
return false;
if (num_format == V_008F14_IMG_NUM_FORMAT_UNORM || num_format == V_008F14_IMG_NUM_FORMAT_SNORM ||
bool
radv_is_atomic_format_supported(VkFormat format)
{
- return format == VK_FORMAT_R32_UINT || format == VK_FORMAT_R32_SINT ||
- format == VK_FORMAT_R32_SFLOAT || format == VK_FORMAT_R64_UINT ||
- format == VK_FORMAT_R64_SINT;
+ return format == VK_FORMAT_R32_UINT || format == VK_FORMAT_R32_SINT || format == VK_FORMAT_R32_SFLOAT ||
+ format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT;
}
bool
-radv_is_storage_image_format_supported(const struct radv_physical_device *physical_device,
- VkFormat format)
+radv_is_storage_image_format_supported(const struct radv_physical_device *physical_device, VkFormat format)
{
const struct util_format_description *desc = vk_format_description(format);
unsigned data_format, num_format;
if (vk_format_is_depth_or_stencil(format))
return false;
- data_format =
- radv_translate_tex_dataformat(format, desc, vk_format_get_first_non_void_channel(format));
- num_format =
- radv_translate_tex_numformat(format, desc, vk_format_get_first_non_void_channel(format));
+ data_format = radv_translate_tex_dataformat(format, desc, vk_format_get_first_non_void_channel(format));
+ num_format = radv_translate_tex_numformat(format, desc, vk_format_get_first_non_void_channel(format));
if (data_format == ~0 || num_format == ~0)
return false;
if (format == VK_FORMAT_UNDEFINED)
return false;
- data_format =
- radv_translate_buffer_dataformat(desc, vk_format_get_first_non_void_channel(format));
+ data_format = radv_translate_buffer_dataformat(desc, vk_format_get_first_non_void_channel(format));
num_format = radv_translate_buffer_numformat(desc, vk_format_get_first_non_void_channel(format));
if (scaled)
- *scaled = (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) ||
- (num_format == V_008F0C_BUF_NUM_FORMAT_USCALED);
+ *scaled = (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) || (num_format == V_008F0C_BUF_NUM_FORMAT_USCALED);
return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID && num_format != ~0;
}
bool
-radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice, VkFormat format,
- bool *blendable)
+radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice, VkFormat format, bool *blendable)
{
const struct util_format_description *desc = vk_format_description(format);
uint32_t color_format = ac_get_cb_format(pdevice->rad_info.gfx_level, desc->format);
bool
radv_device_supports_etc(const struct radv_physical_device *physical_device)
{
- return physical_device->rad_info.family == CHIP_VEGA10 ||
- physical_device->rad_info.family == CHIP_RAVEN ||
- physical_device->rad_info.family == CHIP_RAVEN2 ||
- physical_device->rad_info.family == CHIP_STONEY;
+ return physical_device->rad_info.family == CHIP_VEGA10 || physical_device->rad_info.family == CHIP_RAVEN ||
+ physical_device->rad_info.family == CHIP_RAVEN2 || physical_device->rad_info.family == CHIP_STONEY;
}
static void
-radv_physical_device_get_format_properties(struct radv_physical_device *physical_device,
- VkFormat format, VkFormatProperties3 *out_properties)
+radv_physical_device_get_format_properties(struct radv_physical_device *physical_device, VkFormat format,
+ VkFormatProperties3 *out_properties)
{
VkFormatFeatureFlags2 linear = 0, tiled = 0, buffer = 0;
const struct util_format_description *desc = vk_format_description(format);
const bool multiplanar = vk_format_get_plane_count(format) > 1;
if (multiplanar || desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
- uint64_t tiling = VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
- VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT |
- VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
- VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+ uint64_t tiling = VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT |
+ VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
if (vk_format_get_ycbcr_info(format)) {
- tiling |= VK_FORMAT_FEATURE_2_COSITED_CHROMA_SAMPLES_BIT |
- VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT;
+ tiling |= VK_FORMAT_FEATURE_2_COSITED_CHROMA_SAMPLES_BIT | VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT;
/* The subsampled formats have no support for linear filters. */
if (desc->layout != UTIL_FORMAT_LAYOUT_SUBSAMPLED)
}
if (physical_device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE) {
- if (format == VK_FORMAT_G8_B8R8_2PLANE_420_UNORM ||
- format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16)
- tiling |= VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR | VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR;
+ if (format == VK_FORMAT_G8_B8R8_2PLANE_420_UNORM ||
+ format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16)
+ tiling |= VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR | VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR;
}
if (multiplanar)
tiling |= VK_FORMAT_FEATURE_2_DISJOINT_BIT;
/* Fails for unknown reasons with linear tiling & subsampled formats. */
- out_properties->linearTilingFeatures =
- desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED ? 0 : tiling;
+ out_properties->linearTilingFeatures = desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED ? 0 : tiling;
out_properties->optimalTilingFeatures = tiling;
out_properties->bufferFeatures = 0;
return;
}
if (radv_is_storage_image_format_supported(physical_device, format)) {
- tiled |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT |
- VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT |
+ tiled |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT |
VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT;
- linear |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT |
- VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT |
+ linear |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT |
VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT;
}
buffer |= VK_FORMAT_FEATURE_2_VERTEX_BUFFER_BIT;
if (radv_is_buffer_format_supported(format, &scaled)) {
- if (format != VK_FORMAT_R64_UINT && format != VK_FORMAT_R64_SINT && !scaled &&
- !vk_format_is_srgb(format))
+ if (format != VK_FORMAT_R64_UINT && format != VK_FORMAT_R64_SINT && !scaled && !vk_format_is_srgb(format))
buffer |= VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT;
- buffer |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT |
- VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT |
+ buffer |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT |
VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT;
}
case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
case VK_FORMAT_A2R10G10B10_SINT_PACK32:
case VK_FORMAT_A2B10G10R10_SINT_PACK32:
- buffer &=
- ~(VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT);
+ buffer &= ~(VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT);
linear = 0;
tiled = 0;
break;
if ((HAS_SWIZZLE(0, X) && HAS_SWIZZLE(1, Y)) || (HAS_SWIZZLE(0, X) && HAS_SWIZZLE(1, NONE)) ||
(HAS_SWIZZLE(0, NONE) && HAS_SWIZZLE(1, Y)))
return V_028C70_SWAP_STD; /* XY__ */
- else if ((HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, X)) ||
- (HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, NONE)) ||
+ else if ((HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, X)) || (HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, NONE)) ||
(HAS_SWIZZLE(0, NONE) && HAS_SWIZZLE(1, X)))
/* YX__ */
return (do_endian_swap ? V_028C70_SWAP_STD : V_028C70_SWAP_STD_REV);
return false;
}
} else {
- fprintf(stderr, "failed to fast clear for unhandled component type in format %d\n",
- format);
+ fprintf(stderr, "failed to fast clear for unhandled component type in format %d\n", format);
return false;
}
clear_val |= (v & ((1ULL << channel->size) - 1)) << channel->shift;
static void
radv_list_drm_format_modifiers(struct radv_physical_device *dev, VkFormat format,
- const VkFormatProperties3 *format_props,
- VkDrmFormatModifierPropertiesListEXT *mod_list)
+ const VkFormatProperties3 *format_props, VkDrmFormatModifierPropertiesListEXT *mod_list)
{
unsigned mod_count;
return;
}
- VK_OUTARRAY_MAKE_TYPED(VkDrmFormatModifierPropertiesEXT, out,
- mod_list->pDrmFormatModifierProperties,
+ VK_OUTARRAY_MAKE_TYPED(VkDrmFormatModifierPropertiesEXT, out, mod_list->pDrmFormatModifierProperties,
&mod_list->drmFormatModifierCount);
- ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options,
- vk_format_to_pipe_format(format), &mod_count, NULL);
+ ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options, vk_format_to_pipe_format(format), &mod_count,
+ NULL);
uint64_t *mods = malloc(mod_count * sizeof(uint64_t));
if (!mods) {
mod_list->drmFormatModifierCount = 0;
return;
}
- ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options,
- vk_format_to_pipe_format(format), &mod_count, mods);
+ ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options, vk_format_to_pipe_format(format), &mod_count,
+ mods);
for (unsigned i = 0; i < mod_count; ++i) {
- VkFormatFeatureFlags2 features =
- radv_get_modifier_flags(dev, format, mods[i], format_props);
+ VkFormatFeatureFlags2 features = radv_get_modifier_flags(dev, format, mods[i], format_props);
unsigned planes = vk_format_get_plane_count(format);
if (planes == 1) {
if (ac_modifier_has_dcc_retile(mods[i]))
if (!features)
continue;
- vk_outarray_append_typed(VkDrmFormatModifierPropertiesEXT, &out, out_props) {
- *out_props = (VkDrmFormatModifierPropertiesEXT) {
+ vk_outarray_append_typed(VkDrmFormatModifierPropertiesEXT, &out, out_props)
+ {
+ *out_props = (VkDrmFormatModifierPropertiesEXT){
.drmFormatModifier = mods[i],
.drmFormatModifierPlaneCount = planes,
- .drmFormatModifierTilingFeatures =
- vk_format_features2_to_features(features),
+ .drmFormatModifierTilingFeatures = vk_format_features2_to_features(features),
};
};
}
return;
}
- VK_OUTARRAY_MAKE_TYPED(VkDrmFormatModifierProperties2EXT, out,
- mod_list->pDrmFormatModifierProperties,
+ VK_OUTARRAY_MAKE_TYPED(VkDrmFormatModifierProperties2EXT, out, mod_list->pDrmFormatModifierProperties,
&mod_list->drmFormatModifierCount);
- ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options,
- vk_format_to_pipe_format(format), &mod_count, NULL);
+ ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options, vk_format_to_pipe_format(format), &mod_count,
+ NULL);
uint64_t *mods = malloc(mod_count * sizeof(uint64_t));
if (!mods) {
mod_list->drmFormatModifierCount = 0;
return;
}
- ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options,
- vk_format_to_pipe_format(format), &mod_count, mods);
+ ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options, vk_format_to_pipe_format(format), &mod_count,
+ mods);
for (unsigned i = 0; i < mod_count; ++i) {
- VkFormatFeatureFlags2 features =
- radv_get_modifier_flags(dev, format, mods[i], format_props);
+ VkFormatFeatureFlags2 features = radv_get_modifier_flags(dev, format, mods[i], format_props);
unsigned planes = vk_format_get_plane_count(format);
if (planes == 1) {
if (ac_modifier_has_dcc_retile(mods[i]))
if (!features)
continue;
- vk_outarray_append_typed(VkDrmFormatModifierProperties2EXT, &out, out_props) {
- *out_props = (VkDrmFormatModifierProperties2EXT) {
+ vk_outarray_append_typed(VkDrmFormatModifierProperties2EXT, &out, out_props)
+ {
+ *out_props = (VkDrmFormatModifierProperties2EXT){
.drmFormatModifier = mods[i],
.drmFormatModifierPlaneCount = planes,
.drmFormatModifierTilingFeatures = features,
}
static VkResult
-radv_check_modifier_support(struct radv_physical_device *dev,
- const VkPhysicalDeviceImageFormatInfo2 *info,
+radv_check_modifier_support(struct radv_physical_device *dev, const VkPhysicalDeviceImageFormatInfo2 *info,
VkImageFormatProperties *props, VkFormat format, uint64_t modifier)
{
const struct util_format_description *desc = vk_format_description(format);
return VK_ERROR_FORMAT_NOT_SUPPORTED;
/* We did not add modifiers for sparse textures. */
- if (info->flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT |
- VK_IMAGE_CREATE_SPARSE_ALIASED_BIT))
+ if (info->flags &
+ (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | VK_IMAGE_CREATE_SPARSE_ALIASED_BIT))
return VK_ERROR_FORMAT_NOT_SUPPORTED;
/*
.sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
};
- VkFormatProperties2 format_props2 = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
- .pNext = &mod_list};
+ VkFormatProperties2 format_props2 = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, .pNext = &mod_list};
- radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format,
- &format_props2);
+ radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format, &format_props2);
if (!mod_list.drmFormatModifierCount)
return VK_ERROR_FORMAT_NOT_SUPPORTED;
if (!mod_list.pDrmFormatModifierProperties)
return VK_ERROR_OUT_OF_HOST_MEMORY;
- radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format,
- &format_props2);
+ radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format, &format_props2);
bool found = false;
for (uint32_t i = 0; i < mod_list.drmFormatModifierCount && !found; ++i)
bool need_dcc_sign_reinterpret = false;
if (ac_modifier_has_dcc(modifier) &&
- !radv_are_formats_dcc_compatible(dev, info->pNext, format, info->flags,
- &need_dcc_sign_reinterpret) &&
+ !radv_are_formats_dcc_compatible(dev, info->pNext, format, info->flags, &need_dcc_sign_reinterpret) &&
!need_dcc_sign_reinterpret)
return VK_ERROR_FORMAT_NOT_SUPPORTED;
vk_format_features2_to_features(format_props.linearTilingFeatures);
pFormatProperties->formatProperties.optimalTilingFeatures =
vk_format_features2_to_features(format_props.optimalTilingFeatures);
- pFormatProperties->formatProperties.bufferFeatures =
- vk_format_features2_to_features(format_props.bufferFeatures);
+ pFormatProperties->formatProperties.bufferFeatures = vk_format_features2_to_features(format_props.bufferFeatures);
- VkFormatProperties3 *format_props_extended =
- vk_find_struct(pFormatProperties, FORMAT_PROPERTIES_3);
+ VkFormatProperties3 *format_props_extended = vk_find_struct(pFormatProperties, FORMAT_PROPERTIES_3);
if (format_props_extended) {
format_props_extended->linearTilingFeatures = format_props.linearTilingFeatures;
format_props_extended->optimalTilingFeatures = format_props.optimalTilingFeatures;
format_props_extended->bufferFeatures = format_props.bufferFeatures;
}
- radv_list_drm_format_modifiers(
- physical_device, format, &format_props,
- vk_find_struct(pFormatProperties, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT));
- radv_list_drm_format_modifiers_2(
- physical_device, format, &format_props,
- vk_find_struct(pFormatProperties, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_2_EXT));
+ radv_list_drm_format_modifiers(physical_device, format, &format_props,
+ vk_find_struct(pFormatProperties, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT));
+ radv_list_drm_format_modifiers_2(physical_device, format, &format_props,
+ vk_find_struct(pFormatProperties, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_2_EXT));
}
static VkResult
} else if (tiling == VK_IMAGE_TILING_OPTIMAL) {
format_feature_flags = format_props.optimalTilingFeatures;
} else if (tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
- format_feature_flags = radv_get_modifier_flags(physical_device, format,
- mod_info->drmFormatModifier, &format_props);
+ format_feature_flags =
+ radv_get_modifier_flags(physical_device, format, mod_info->drmFormatModifier, &format_props);
} else {
unreachable("bad VkImageTiling");
}
}
if (tiling == VK_IMAGE_TILING_OPTIMAL && info->type == VK_IMAGE_TYPE_2D &&
- (format_feature_flags & (VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT |
- VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
+ (format_feature_flags &
+ (VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
!(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
!(info->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)) {
sampleCounts |= VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
}
- if (tiling == VK_IMAGE_TILING_LINEAR &&
- (format == VK_FORMAT_R32G32B32_SFLOAT || format == VK_FORMAT_R32G32B32_SINT ||
- format == VK_FORMAT_R32G32B32_UINT)) {
+ if (tiling == VK_IMAGE_TILING_LINEAR && (format == VK_FORMAT_R32G32B32_SFLOAT ||
+ format == VK_FORMAT_R32G32B32_SINT || format == VK_FORMAT_R32G32B32_UINT)) {
/* R32G32B32 is a weird format and the driver currently only
* supports the barely minimum.
* TODO: Implement more if we really need to.
if (physical_device->rad_info.gfx_level >= GFX9 && info->type == VK_IMAGE_TYPE_3D &&
vk_format_get_blocksizebits(format) == 128 && vk_format_is_compressed(format) &&
(info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) &&
- ((info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) ||
- (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))) {
+ ((info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) || (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))) {
goto unsupported;
}
* different format on GFX6.
*/
if (physical_device->rad_info.gfx_level == GFX6 && info->type == VK_IMAGE_TYPE_1D &&
- vk_format_is_block_compressed(format) &&
- (info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) &&
- ((info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) ||
- (info->usage & VK_IMAGE_USAGE_STORAGE_BIT))) {
+ vk_format_is_block_compressed(format) && (info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) &&
+ ((info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) || (info->usage & VK_IMAGE_USAGE_STORAGE_BIT))) {
goto unsupported;
}
}
if (image_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
- if (!(format_feature_flags & (VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT |
- VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT))) {
+ if (!(format_feature_flags &
+ (VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT))) {
goto unsupported;
}
}
goto unsupported;
}
- if ((info->flags &
- (VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT)) &&
+ if ((info->flags & (VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT)) &&
(desc->layout == UTIL_FORMAT_LAYOUT_ETC && physical_device->emulate_etc2)) {
goto unsupported;
}
switch (pImageFormatInfo->type) {
case VK_IMAGE_TYPE_2D:
- flags =
- VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+ flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
break;
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
switch (pImageFormatInfo->type) {
case VK_IMAGE_TYPE_2D:
- flags =
- VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+ flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
if (pImageFormatInfo->tiling != VK_IMAGE_TILING_LINEAR)
flags |= VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT;
format_properties->maxArrayLayers = MIN2(1, format_properties->maxArrayLayers);
format_properties->sampleCounts &= VK_SAMPLE_COUNT_1_BIT;
- flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT |
- VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+ flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
/* advertise EXPORTABLE only when radv_create_ahb_memory supports the format */
if (radv_android_gralloc_supports_format(pImageFormatInfo->format, pImageFormatInfo->usage))
VkResult result;
VkFormat format = radv_select_android_external_format(base_info->pNext, base_info->format);
- result = radv_get_image_format_properties(physical_device, base_info, format,
- &base_props->imageFormatProperties);
+ result = radv_get_image_format_properties(physical_device, base_info, format, &base_props->imageFormatProperties);
if (result != VK_SUCCESS)
return result;
/* Extract input structs */
- vk_foreach_struct_const(s, base_info->pNext)
- {
+ vk_foreach_struct_const (s, base_info->pNext) {
switch (s->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO:
external_info = (const void *)s;
}
/* Extract output structs */
- vk_foreach_struct(s, base_props->pNext)
- {
+ vk_foreach_struct (s, base_props->pNext) {
switch (s->sType) {
case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES:
external_props = (void *)s;
}
}
- bool ahb_supported =
- physical_device->vk.supported_extensions.ANDROID_external_memory_android_hardware_buffer;
+ bool ahb_supported = physical_device->vk.supported_extensions.ANDROID_external_memory_android_hardware_buffer;
if (android_usage && ahb_supported) {
- android_usage->androidHardwareBufferUsage =
- vk_image_usage_to_ahb_usage(base_info->flags, base_info->usage);
+ android_usage->androidHardwareBufferUsage = vk_image_usage_to_ahb_usage(base_info->flags, base_info->usage);
}
/* From the Vulkan 1.0.97 spec:
* VK_ERROR_FORMAT_NOT_SUPPORTED.
*/
result = vk_errorf(physical_device, VK_ERROR_FORMAT_NOT_SUPPORTED,
- "unsupported VkExternalMemoryTypeFlagBitsKHR 0x%x",
- external_info->handleType);
+ "unsupported VkExternalMemoryTypeFlagBitsKHR 0x%x", external_info->handleType);
goto fail;
}
}
}
static void
-fill_sparse_image_format_properties(struct radv_physical_device *pdev, VkImageType type,
- VkFormat format, VkSparseImageFormatProperties *prop)
+fill_sparse_image_format_properties(struct radv_physical_device *pdev, VkImageType type, VkFormat format,
+ VkSparseImageFormatProperties *prop)
{
prop->aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
prop->flags = 0;
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetPhysicalDeviceSparseImageFormatProperties2(
- VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
- uint32_t *pPropertyCount, VkSparseImageFormatProperties2 *pProperties)
+radv_GetPhysicalDeviceSparseImageFormatProperties2(VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
+ uint32_t *pPropertyCount,
+ VkSparseImageFormatProperties2 *pProperties)
{
RADV_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
VkResult result;
vk_outarray_append_typed(VkSparseImageFormatProperties2, &out, prop)
{
- fill_sparse_image_format_properties(pdev, pFormatInfo->type, pFormatInfo->format,
- &prop->properties);
+ fill_sparse_image_format_properties(pdev, pFormatInfo->type, pFormatInfo->format, &prop->properties);
};
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetImageSparseMemoryRequirements2(VkDevice _device,
- const VkImageSparseMemoryRequirementsInfo2 *pInfo,
+radv_GetImageSparseMemoryRequirements2(VkDevice _device, const VkImageSparseMemoryRequirementsInfo2 *pInfo,
uint32_t *pSparseMemoryRequirementCount,
VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
{
vk_outarray_append_typed(VkSparseImageMemoryRequirements2, &out, req)
{
- fill_sparse_image_format_properties(device->physical_device, image->vk.image_type,
- image->vk.format,
+ fill_sparse_image_format_properties(device->physical_device, image->vk.image_type, image->vk.format,
&req->memoryRequirements.formatProperties);
req->memoryRequirements.imageMipTailFirstLod = image->planes[0].surface.first_mip_tail_level;
/* The tail is always a single tile per layer. */
req->memoryRequirements.imageMipTailSize = 65536;
req->memoryRequirements.imageMipTailOffset =
- image->planes[0]
- .surface.u.gfx9.prt_level_offset[req->memoryRequirements.imageMipTailFirstLod] &
- ~65535;
- req->memoryRequirements.imageMipTailStride =
- image->planes[0].surface.u.gfx9.surf_slice_size;
+ image->planes[0].surface.u.gfx9.prt_level_offset[req->memoryRequirements.imageMipTailFirstLod] & ~65535;
+ req->memoryRequirements.imageMipTailStride = image->planes[0].surface.u.gfx9.surf_slice_size;
} else {
req->memoryRequirements.imageMipTailOffset =
(uint64_t)image->planes[0]
.surface.u.legacy.level[req->memoryRequirements.imageMipTailFirstLod]
- .offset_256B * 256;
- req->memoryRequirements.imageMipTailSize =
- image->size - req->memoryRequirements.imageMipTailOffset;
+ .offset_256B *
+ 256;
+ req->memoryRequirements.imageMipTailSize = image->size - req->memoryRequirements.imageMipTailOffset;
req->memoryRequirements.imageMipTailStride = 0;
}
} else {
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetDeviceImageSparseMemoryRequirements(VkDevice device,
- const VkDeviceImageMemoryRequirements* pInfo,
+radv_GetDeviceImageSparseMemoryRequirements(VkDevice device, const VkDeviceImageMemoryRequirements *pInfo,
uint32_t *pSparseMemoryRequirementCount,
VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
{
* creating an image.
* TODO: Avoid creating an image.
*/
- result = radv_image_create(
- device, &(struct radv_image_create_info){.vk_info = pInfo->pCreateInfo}, NULL, &image, true);
+ result =
+ radv_image_create(device, &(struct radv_image_create_info){.vk_info = pInfo->pCreateInfo}, NULL, &image, true);
assert(result == VK_SUCCESS);
VkImageSparseMemoryRequirementsInfo2 info2 = {
.image = image,
};
- radv_GetImageSparseMemoryRequirements2(device, &info2, pSparseMemoryRequirementCount,
- pSparseMemoryRequirements);
+ radv_GetImageSparseMemoryRequirements2(device, &info2, pSparseMemoryRequirementCount, pSparseMemoryRequirements);
radv_DestroyImage(device, image, NULL);
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetPhysicalDeviceExternalBufferProperties(
- VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
- VkExternalBufferProperties *pExternalBufferProperties)
+radv_GetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
+ VkExternalBufferProperties *pExternalBufferProperties)
{
VkExternalMemoryFeatureFlagBits flags = 0;
VkExternalMemoryHandleTypeFlags export_flags = 0;
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
- compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+ compat_flags = export_flags =
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
break;
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
/* Return the type of DCC encoding. */
static void
-radv_get_dcc_channel_type(const struct util_format_description *desc, enum dcc_channel_type *type,
- unsigned *size)
+radv_get_dcc_channel_type(const struct util_format_description *desc, enum dcc_channel_type *type, unsigned *size)
{
int i = util_format_get_first_non_void_channel(desc->format);
if (i == -1) {
/* Return if it's allowed to reinterpret one format as another with DCC enabled. */
bool
-radv_dcc_formats_compatible(enum amd_gfx_level gfx_level, VkFormat format1, VkFormat format2,
- bool *sign_reinterpret)
+radv_dcc_formats_compatible(enum amd_gfx_level gfx_level, VkFormat format1, VkFormat format2, bool *sign_reinterpret)
{
const struct util_format_description *desc1, *desc2;
enum dcc_channel_type type1, type2;
* IN THE SOFTWARE.
*/
-#include "ac_drm_fourcc.h"
-#include "util/u_debug.h"
#include "util/u_atomic.h"
+#include "util/u_debug.h"
#include "vulkan/util/vk_format.h"
+#include "ac_drm_fourcc.h"
#include "radv_debug.h"
#include "radv_private.h"
#include "radv_radeon_winsys.h"
#include "gfx10_format_table.h"
static unsigned
-radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
- VkFormat format)
+radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
{
if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
assert(pCreateInfo->samples <= 1);
}
static bool
-radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
- VkFormat format)
+radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
{
/* TC-compat HTILE is only available for GFX8+. */
if (device->physical_device->rad_info.gfx_level < GFX8)
/* Do not enable TC-compatible HTILE if the image isn't readable by a
* shader because no texture fetches will happen.
*/
- if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
- VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
+ if (!(pCreateInfo->usage &
+ (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
return false;
if (device->physical_device->rad_info.gfx_level < GFX9) {
* the driver allows TC-compat HTILE for 16-bit depth surfaces
* with no Z planes compression.
*/
- if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT &&
- format != VK_FORMAT_D16_UNORM)
+ if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT && format != VK_FORMAT_D16_UNORM)
return false;
}
}
static bool
-radv_image_use_fast_clear_for_image_early(const struct radv_device *device,
- const struct radv_image *image)
+radv_image_use_fast_clear_for_image_early(const struct radv_device *device, const struct radv_image *image)
{
if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
return true;
}
static bool
-radv_image_use_fast_clear_for_image(const struct radv_device *device,
- const struct radv_image *image)
+radv_image_use_fast_clear_for_image(const struct radv_device *device, const struct radv_image *image)
{
if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
return true;
- return radv_image_use_fast_clear_for_image_early(device, image) &&
- (image->exclusive ||
- /* Enable DCC for concurrent images if stores are
- * supported because that means we can keep DCC compressed on
- * all layouts/queues.
- */
- radv_image_use_dcc_image_stores(device, image));
+ return radv_image_use_fast_clear_for_image_early(device, image) && (image->exclusive ||
+ /* Enable DCC for concurrent images if stores are
+ * supported because that means we can keep DCC
+ * compressed on all layouts/queues.
+ */
+ radv_image_use_dcc_image_stores(device, image));
}
bool
-radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
- VkFormat format, VkImageCreateFlags flags, bool *sign_reinterpret)
+radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext, VkFormat format,
+ VkImageCreateFlags flags, bool *sign_reinterpret)
{
bool blendable;
if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
const struct VkImageFormatListCreateInfo *format_list =
- (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
- pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
+ (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
/* We have to ignore the existence of the list if viewFormatCount = 0 */
if (format_list && format_list->viewFormatCount) {
if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
continue;
- if (!radv_dcc_formats_compatible(pdev->rad_info.gfx_level, format,
- format_list->pViewFormats[i], sign_reinterpret))
+ if (!radv_dcc_formats_compatible(pdev->rad_info.gfx_level, format, format_list->pViewFormats[i],
+ sign_reinterpret))
return false;
}
} else {
}
static bool
-radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format,
- VkImageCreateFlags flags)
+radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format, VkImageCreateFlags flags)
{
if (radv_format_is_atomic_allowed(device, format))
return true;
if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
const struct VkImageFormatListCreateInfo *format_list =
- (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
- pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
+ (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
/* We have to ignore the existence of the list if viewFormatCount = 0 */
if (format_list && format_list->viewFormatCount) {
}
static bool
-radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image,
- const VkImageCreateInfo *pCreateInfo, VkFormat format,
- bool *sign_reinterpret)
+radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image, const VkImageCreateInfo *pCreateInfo,
+ VkFormat format, bool *sign_reinterpret)
{
/* DCC (Delta Color Compression) is only available for GFX8+. */
if (device->physical_device->rad_info.gfx_level < GFX8)
if (device->physical_device->rad_info.gfx_level == GFX11 && pCreateInfo->mipLevels > 1)
return false;
- return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format,
- pCreateInfo->flags, sign_reinterpret);
+ return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format, pCreateInfo->flags,
+ sign_reinterpret);
}
static bool
/* TODO: Fix storage images with DCC without DCC image stores.
* Disabling it for now. */
- if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
- !radv_image_use_dcc_image_stores(device, image))
+ if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && !radv_image_use_dcc_image_stores(device, image))
return false;
return true;
bool
radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
{
- return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.gfx_level,
- &image->planes[0].surface);
+ return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.gfx_level, &image->planes[0].surface);
}
/*
* - Investigate about mips+layers.
* - Enable on other gens.
*/
- bool use_htile_for_mips =
- image->vk.array_layers == 1 && device->physical_device->rad_info.gfx_level >= GFX10;
+ bool use_htile_for_mips = image->vk.array_layers == 1 && device->physical_device->rad_info.gfx_level >= GFX10;
/* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
- if (device->physical_device->rad_info.gfx_level == GFX10 &&
- image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->vk.mip_levels > 1)
+ if (device->physical_device->rad_info.gfx_level == GFX10 && image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT &&
+ image->vk.mip_levels > 1)
return false;
/* Do not enable HTILE for very small images because it seems less performant but make sure it's
return false;
/* GFX9 has issues when sample count is greater than 2 */
- if (device->physical_device->rad_info.gfx_level == GFX9 &&
- image->vk.samples > 2)
+ if (device->physical_device->rad_info.gfx_level == GFX9 && image->vk.samples > 2)
return false;
if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
return false;
/* TC-compat CMASK with storage images is supported on GFX10+. */
- if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
- device->physical_device->rad_info.gfx_level < GFX10)
+ if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && device->physical_device->rad_info.gfx_level < GFX10)
return false;
/* Do not enable TC-compatible if the image isn't readable by a shader
* because no texture fetches will happen.
*/
- if (!(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
- VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
+ if (!(image->vk.usage &
+ (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
return false;
/* If the image doesn't have FMASK, it can't be fetchable. */
static VkResult
radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
- const struct radv_image_create_info *create_info,
- struct ac_surf_info *image_info)
+ const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
{
unsigned width = image->vk.extent.width;
unsigned height = image->vk.extent.height;
*
* Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
*/
- if (create_info->bo_metadata &&
- radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
+ if (create_info->bo_metadata && radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
const struct radeon_bo_metadata *md = create_info->bo_metadata;
if (device->physical_device->rad_info.gfx_level >= GFX10) {
static VkResult
radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
- const struct radv_image_create_info *create_info,
- struct ac_surf_info *image_info)
+ const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
{
VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
if (result != VK_SUCCESS)
for (unsigned plane = 0; plane < image->plane_count; ++plane) {
if (create_info->bo_metadata) {
- radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
- create_info->bo_metadata);
+ radv_patch_surface_from_metadata(device, &image->planes[plane].surface, create_info->bo_metadata);
}
if (radv_surface_has_scanout(device, create_info)) {
}
static VkFormat
-radv_image_get_plane_format(const struct radv_physical_device *pdev, const struct radv_image *image,
- unsigned plane)
+radv_image_get_plane_format(const struct radv_physical_device *pdev, const struct radv_image *image, unsigned plane)
{
- if (pdev->emulate_etc2 &&
- vk_format_description(image->vk.format)->layout == UTIL_FORMAT_LAYOUT_ETC) {
+ if (pdev->emulate_etc2 && vk_format_description(image->vk.format)->layout == UTIL_FORMAT_LAYOUT_ETC) {
if (plane == 0)
return image->vk.format;
return etc2_emulation_format(image->vk.format);
flags |= RADEON_SURF_NO_STENCIL_ADJUST;
}
- if (radv_use_htile_for_image(device, image) &&
- !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ) &&
+ if (radv_use_htile_for_image(device, image) && !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ) &&
!(flags & RADEON_SURF_NO_RENDER_TARGET)) {
if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
if (is_stencil)
flags |= RADEON_SURF_SBUFFER;
- if (device->physical_device->rad_info.gfx_level >= GFX9 &&
- pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
+ if (device->physical_device->rad_info.gfx_level >= GFX9 && pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
flags |= RADEON_SURF_NO_RENDER_TARGET;
- if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format,
- &image->dcc_sign_reinterpret))
+ if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format, &image->dcc_sign_reinterpret))
flags |= RADEON_SURF_DISABLE_DCC;
if (!radv_use_fmask_for_image(device, image))
flags |= RADEON_SURF_NO_FMASK;
if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
- flags |=
- RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC;
+ flags |= RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC;
}
/* Disable DCC for VRS rate images because the hw can't handle compression. */
for (unsigned i = 0; i < 4; i++)
swizzle[i] = desc->swizzle[i];
} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
- const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0,
- PIPE_SWIZZLE_1};
+ const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0, PIPE_SWIZZLE_1};
vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
} else {
vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
}
void
-radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFormat vk_format,
- unsigned offset, unsigned range, uint32_t *state)
+radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFormat vk_format, unsigned offset,
+ unsigned range, uint32_t *state)
{
const struct util_format_description *desc;
unsigned stride;
range /= stride;
}
- rsrc_word3 = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
- S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
- S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
- S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
+ rsrc_word3 = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
+ S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
if (device->physical_device->rad_info.gfx_level >= GFX10) {
- const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&device->physical_device->rad_info)[vk_format_to_pipe_format(vk_format)];
+ const struct gfx10_format *fmt =
+ &ac_get_gfx10_format_table(&device->physical_device->rad_info)[vk_format_to_pipe_format(vk_format)];
/* OOB_SELECT chooses the out-of-bounds check.
*
* else:
* offset+payload > NUM_RECORDS
*/
- rsrc_word3 |= S_008F0C_FORMAT(fmt->img_format) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
+ rsrc_word3 |= S_008F0C_FORMAT(fmt->img_format) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
S_008F0C_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level < GFX11);
} else {
num_format = radv_translate_buffer_numformat(desc, first_non_void);
static void
si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image,
- const struct legacy_surf_level *base_level_info, unsigned plane_id,
- unsigned base_level, unsigned first_level, unsigned block_width,
- bool is_stencil, bool is_storage_image, bool disable_compression,
- bool enable_write_compression, uint32_t *state,
+ const struct legacy_surf_level *base_level_info, unsigned plane_id, unsigned base_level,
+ unsigned first_level, unsigned block_width, bool is_stencil, bool is_storage_image,
+ bool disable_compression, bool enable_write_compression, uint32_t *state,
const struct ac_surf_nbc_view *nbc_view)
{
struct radv_image_plane *plane = &image->planes[plane_id];
* If an imported image is used with VK_IMAGE_VIEW_TYPE_2D_ARRAY, it may hang due to VM faults
* because DEPTH means pitch with 2D, but it means depth with 2D array.
*/
- if (device->physical_device->rad_info.gfx_level >= GFX10_3 &&
- image->vk.image_type == VK_IMAGE_TYPE_2D &&
- plane->surface.is_linear &&
- util_is_power_of_two_nonzero(plane->surface.bpe) &&
+ if (device->physical_device->rad_info.gfx_level >= GFX10_3 && image->vk.image_type == VK_IMAGE_TYPE_2D &&
+ plane->surface.is_linear && util_is_power_of_two_nonzero(plane->surface.bpe) &&
G_00A00C_TYPE(state[3]) == V_008F1C_SQ_RSRC_IMG_2D) {
assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0);
unsigned pitch = plane->surface.u.gfx9.surf_pitch;
if (radv_dcc_enabled(image, first_level) && is_storage_image && enable_write_compression)
state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1);
- state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
- S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
+ state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) | S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
}
state[7] = meta_va >> 16;
state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.epitch);
}
- state[5] &=
- C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED;
+ state[5] &= C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED;
if (meta_va) {
struct gfx9_surf_meta_flags meta = {
.rb_aligned = 1,
if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER))
meta = plane->surface.u.gfx9.color.dcc;
- state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
- S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
+ state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) | S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
S_008F24_META_RB_ALIGNED(meta.rb_aligned);
}
} else {
}
static unsigned
-radv_tex_dim(VkImageType image_type, VkImageViewType view_type, unsigned nr_layers,
- unsigned nr_samples, bool is_storage_image, bool gfx9)
+radv_tex_dim(VkImageType image_type, VkImageViewType view_type, unsigned nr_layers, unsigned nr_samples,
+ bool is_storage_image, bool gfx9)
{
if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
* Build the sampler view descriptor for a texture (GFX10).
*/
static void
-gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
- bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
- const VkComponentMapping *mapping, unsigned first_level,
- unsigned last_level, unsigned first_layer, unsigned last_layer,
- unsigned width, unsigned height, unsigned depth, float min_lod,
- uint32_t *state, uint32_t *fmask_state,
- VkImageCreateFlags img_create_flags,
- const struct ac_surf_nbc_view *nbc_view,
- const VkImageViewSlicedCreateInfoEXT *sliced_3d)
+gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *image, bool is_storage_image,
+ VkImageViewType view_type, VkFormat vk_format, const VkComponentMapping *mapping,
+ unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer,
+ unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state,
+ uint32_t *fmask_state, VkImageCreateFlags img_create_flags,
+ const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d)
{
const struct util_format_description *desc;
enum pipe_swizzle swizzle[4];
/* For emulated ETC2 without alpha we need to override the format to a 3-componenent format, so
* that border colors work correctly (alpha forced to 1). Since Vulkan has no such format,
* this uses the Gallium formats to set the description. */
- if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK &&
- vk_format == VK_FORMAT_R8G8B8A8_UNORM) {
+ if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK && vk_format == VK_FORMAT_R8G8B8A8_UNORM) {
desc = util_format_description(PIPE_FORMAT_R8G8B8X8_UNORM);
- } else if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK &&
- vk_format == VK_FORMAT_R8G8B8A8_SRGB) {
+ } else if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK && vk_format == VK_FORMAT_R8G8B8A8_SRGB) {
desc = util_format_description(PIPE_FORMAT_R8G8B8X8_SRGB);
}
- img_format = ac_get_gfx10_format_table(&device->physical_device->rad_info)[vk_format_to_pipe_format(vk_format)].img_format;
+ img_format =
+ ac_get_gfx10_format_table(&device->physical_device->rad_info)[vk_format_to_pipe_format(vk_format)].img_format;
radv_compose_swizzle(desc, mapping, swizzle);
assert(image->vk.image_type == VK_IMAGE_TYPE_3D);
type = V_008F1C_SQ_RSRC_IMG_3D;
} else {
- type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples,
- is_storage_image, device->physical_device->rad_info.gfx_level == GFX9);
+ type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, is_storage_image,
+ device->physical_device->rad_info.gfx_level == GFX9);
}
if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
depth = image->vk.array_layers / 6;
state[0] = 0;
- state[1] = S_00A004_FORMAT(img_format) |
- S_00A004_WIDTH_LO(width - 1);
+ state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1);
state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
S_00A008_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level < GFX11);
- state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
- S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
- S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
- S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
+ state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
+ S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
S_00A00C_BASE_LEVEL(image->vk.samples > 1 ? 0 : first_level) |
- S_00A00C_LAST_LEVEL(image->vk.samples > 1 ? util_logbase2(image->vk.samples)
- : last_level) |
+ S_00A00C_LAST_LEVEL(image->vk.samples > 1 ? util_logbase2(image->vk.samples) : last_level) |
S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc)) | S_00A00C_TYPE(type);
/* Depth is the the last accessible layer on gfx9+. The hw doesn't need
* to know the total number of layers.
*/
- state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
- S_00A010_BASE_ARRAY(first_layer);
- state[5] = S_00A014_ARRAY_PITCH(0) |
- S_00A014_PERF_MOD(4);
+ state[4] =
+ S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) | S_00A010_BASE_ARRAY(first_layer);
+ state[5] = S_00A014_ARRAY_PITCH(0) | S_00A014_PERF_MOD(4);
state[6] = 0;
state[7] = 0;
assert(type == V_008F1C_SQ_RSRC_IMG_3D && is_storage_image);
unsigned first_slice = sliced_3d->sliceOffset;
- unsigned slice_count = sliced_3d->sliceCount == VK_REMAINING_3D_SLICES_EXT ?
- MAX2(1, total - sliced_3d->sliceOffset) : sliced_3d->sliceCount;
+ unsigned slice_count = sliced_3d->sliceCount == VK_REMAINING_3D_SLICES_EXT
+ ? MAX2(1, total - sliced_3d->sliceOffset)
+ : sliced_3d->sliceCount;
unsigned last_slice = first_slice + slice_count - 1;
state[4] = 0;
state[5] |= S_00A014_ARRAY_PITCH(1);
}
- unsigned max_mip =
- image->vk.samples > 1 ? util_logbase2(image->vk.samples) : image->vk.mip_levels - 1;
+ unsigned max_mip = image->vk.samples > 1 ? util_logbase2(image->vk.samples) : image->vk.mip_levels - 1;
if (nbc_view && nbc_view->valid)
max_mip = nbc_view->num_levels - 1;
}
if (radv_dcc_enabled(image, first_level)) {
- state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
- S_00A018_MAX_COMPRESSED_BLOCK_SIZE(
- image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size) |
- S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
+ state[6] |=
+ S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
+ S_00A018_MAX_COMPRESSED_BLOCK_SIZE(image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size) |
+ S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
}
if (radv_image_get_iterate256(device, image)) {
}
fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
- fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) |
- S_00A004_WIDTH_LO(width - 1);
- fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
- S_00A008_RESOURCE_LEVEL(1);
+ fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) | S_00A004_WIDTH_LO(width - 1);
+ fmask_state[2] =
+ S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | S_00A008_RESOURCE_LEVEL(1);
fmask_state[3] =
S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode) |
- S_00A00C_TYPE(
- radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, 0, false, false));
+ S_00A00C_TYPE(radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, 0, false, false));
fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer);
fmask_state[5] = 0;
fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
* Build the sampler view descriptor for a texture (SI-GFX9)
*/
static void
-si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
- bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
- const VkComponentMapping *mapping, unsigned first_level,
- unsigned last_level, unsigned first_layer, unsigned last_layer,
- unsigned width, unsigned height, unsigned depth, float min_lod,
- uint32_t *state, uint32_t *fmask_state,
- VkImageCreateFlags img_create_flags)
+si_make_texture_descriptor(struct radv_device *device, struct radv_image *image, bool is_storage_image,
+ VkImageViewType view_type, VkFormat vk_format, const VkComponentMapping *mapping,
+ unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer,
+ unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state,
+ uint32_t *fmask_state, VkImageCreateFlags img_create_flags)
{
const struct util_format_description *desc;
enum pipe_swizzle swizzle[4];
/* For emulated ETC2 without alpha we need to override the format to a 3-componenent format, so
* that border colors work correctly (alpha forced to 1). Since Vulkan has no such format,
* this uses the Gallium formats to set the description. */
- if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK &&
- vk_format == VK_FORMAT_R8G8B8A8_UNORM) {
+ if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK && vk_format == VK_FORMAT_R8G8B8A8_UNORM) {
desc = util_format_description(PIPE_FORMAT_R8G8B8X8_UNORM);
- } else if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK &&
- vk_format == VK_FORMAT_R8G8B8A8_SRGB) {
+ } else if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK && vk_format == VK_FORMAT_R8G8B8A8_SRGB) {
desc = util_format_description(PIPE_FORMAT_R8G8B8X8_SRGB);
}
assert(image->vk.image_type == VK_IMAGE_TYPE_3D);
type = V_008F1C_SQ_RSRC_IMG_3D;
} else {
- type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples,
- is_storage_image, device->physical_device->rad_info.gfx_level == GFX9);
+ type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, is_storage_image,
+ device->physical_device->rad_info.gfx_level == GFX9);
}
if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
depth = image->vk.array_layers / 6;
state[0] = 0;
- state[1] = (S_008F14_MIN_LOD(radv_float_to_ufixed(CLAMP(min_lod, 0, 15), 8)) |
- S_008F14_DATA_FORMAT(data_format) |
+ state[1] = (S_008F14_MIN_LOD(radv_float_to_ufixed(CLAMP(min_lod, 0, 15), 8)) | S_008F14_DATA_FORMAT(data_format) |
S_008F14_NUM_FORMAT(num_format));
state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4));
- state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
- S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
- S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
- S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
+ state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
+ S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
S_008F1C_BASE_LEVEL(image->vk.samples > 1 ? 0 : first_level) |
- S_008F1C_LAST_LEVEL(image->vk.samples > 1 ? util_logbase2(image->vk.samples)
- : last_level) |
+ S_008F1C_LAST_LEVEL(image->vk.samples > 1 ? util_logbase2(image->vk.samples) : last_level) |
S_008F1C_TYPE(type));
state[4] = 0;
state[5] = S_008F24_BASE_ARRAY(first_layer);
state[4] |= S_008F20_DEPTH(last_layer);
state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
- state[5] |= S_008F24_MAX_MIP(image->vk.samples > 1 ? util_logbase2(image->vk.samples)
- : image->vk.mip_levels - 1);
+ state[5] |= S_008F24_MAX_MIP(image->vk.samples > 1 ? util_logbase2(image->vk.samples) : image->vk.mip_levels - 1);
} else {
state[3] |= S_008F1C_POW2_PAD(image->vk.mip_levels > 1);
state[4] |= S_008F20_DEPTH(depth - 1);
state[5] |= S_008F24_LAST_ARRAY(last_layer);
}
- if (!(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) &&
- image->planes[0].surface.meta_offset) {
+ if (!(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) && image->planes[0].surface.meta_offset) {
state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
} else {
if (device->instance->disable_aniso_single_level) {
fmask_state[0] = va >> 8;
fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
- fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(fmask_format) |
- S_008F14_NUM_FORMAT(num_format);
+ fmask_state[1] =
+ S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(fmask_format) | S_008F14_NUM_FORMAT(num_format);
fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1);
fmask_state[3] =
S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
- S_008F1C_TYPE(
- radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, 0, false, false));
+ S_008F1C_TYPE(radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, 0, false, false));
fmask_state[4] = 0;
fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
fmask_state[6] = 0;
if (device->physical_device->rad_info.gfx_level == GFX9) {
fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode);
- fmask_state[4] |= S_008F20_DEPTH(last_layer) |
- S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch);
+ fmask_state[4] |=
+ S_008F20_DEPTH(last_layer) | S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch);
fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | S_008F24_META_RB_ALIGNED(1);
if (radv_image_is_tc_compat_cmask(image)) {
fmask_state[7] |= va >> 8;
}
} else {
- fmask_state[3] |=
- S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.color.fmask.tiling_index);
- fmask_state[4] |=
- S_008F20_DEPTH(depth - 1) |
- S_008F20_PITCH(image->planes[0].surface.u.legacy.color.fmask.pitch_in_pixels - 1);
+ fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.color.fmask.tiling_index);
+ fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
+ S_008F20_PITCH(image->planes[0].surface.u.legacy.color.fmask.pitch_in_pixels - 1);
fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
if (radv_image_is_tc_compat_cmask(image)) {
}
static void
-radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
- bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
- const VkComponentMapping *mapping, unsigned first_level,
- unsigned last_level, unsigned first_layer, unsigned last_layer,
+radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image, bool is_storage_image,
+ VkImageViewType view_type, VkFormat vk_format, const VkComponentMapping *mapping,
+ unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer,
unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state,
uint32_t *fmask_state, VkImageCreateFlags img_create_flags,
- const struct ac_surf_nbc_view *nbc_view,
- const VkImageViewSlicedCreateInfoEXT *sliced_3d)
+ const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d)
{
if (device->physical_device->rad_info.gfx_level >= GFX10) {
- gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
- first_level, last_level, first_layer, last_layer, width, height,
- depth, min_lod, state, fmask_state, img_create_flags, nbc_view,
- sliced_3d);
+ gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, first_level,
+ last_level, first_layer, last_layer, width, height, depth, min_lod, state,
+ fmask_state, img_create_flags, nbc_view, sliced_3d);
} else {
- si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
- first_level, last_level, first_layer, last_layer, width, height,
- depth, min_lod, state, fmask_state, img_create_flags);
+ si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, first_level,
+ last_level, first_layer, last_layer, width, height, depth, min_lod, state, fmask_state,
+ img_create_flags);
}
}
static void
-radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image,
- struct radeon_bo_metadata *md)
+radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *md)
{
static const VkComponentMapping fixedmapping;
uint32_t desc[8];
assert(image->plane_count == 1);
- radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type,
- image->vk.format, &fixedmapping, 0, image->vk.mip_levels - 1, 0,
- image->vk.array_layers - 1, image->vk.extent.width, image->vk.extent.height,
- image->vk.extent.depth, 0.0f, desc, NULL, 0, NULL, NULL);
+ radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type, image->vk.format,
+ &fixedmapping, 0, image->vk.mip_levels - 1, 0, image->vk.array_layers - 1,
+ image->vk.extent.width, image->vk.extent.height, image->vk.extent.depth, 0.0f, desc,
+ NULL, 0, NULL, NULL);
- si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0,
- 0, image->planes[0].surface.blk_w, false, false, false, false,
- desc, NULL);
+ si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
+ image->planes[0].surface.blk_w, false, false, false, false, desc, NULL);
- ac_surface_compute_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface,
- image->vk.mip_levels, desc, &md->size_metadata, md->metadata,
+ ac_surface_compute_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface, image->vk.mip_levels,
+ desc, &md->size_metadata, md->metadata,
device->instance->debug_flags & RADV_DEBUG_EXTRA_MD);
}
void
-radv_init_metadata(struct radv_device *device, struct radv_image *image,
- struct radeon_bo_metadata *metadata)
+radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata)
{
struct radeon_surf *surface = &image->planes[0].surface;
if (device->physical_device->rad_info.gfx_level >= GFX9) {
uint64_t dcc_offset =
- image->bindings[0].offset +
- (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
+ image->bindings[0].offset + (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode;
metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.color.display_dcc_pitch_max;
metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.color.dcc.independent_64B_blocks;
metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.color.dcc.independent_128B_blocks;
- metadata->u.gfx9.dcc_max_compressed_block_size =
- surface->u.gfx9.color.dcc.max_compressed_block_size;
+ metadata->u.gfx9.dcc_max_compressed_block_size = surface->u.gfx9.color.dcc.max_compressed_block_size;
metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
} else {
- metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D
- ? RADEON_LAYOUT_TILED
- : RADEON_LAYOUT_LINEAR;
- metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D
- ? RADEON_LAYOUT_TILED
- : RADEON_LAYOUT_LINEAR;
+ metadata->u.legacy.microtile =
+ surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
+ metadata->u.legacy.macrotile =
+ surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ? RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
metadata->u.legacy.bankw = surface->u.legacy.bankw;
metadata->u.legacy.bankh = surface->u.legacy.bankh;
}
void
-radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
- uint64_t offset, uint32_t stride)
+radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, uint64_t offset,
+ uint32_t stride)
{
ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[0].surface,
image->vk.array_layers, image->vk.mip_levels, offset, stride);
}
static void
-radv_image_alloc_single_sample_cmask(const struct radv_device *device,
- const struct radv_image *image, struct radeon_surf *surf)
+radv_image_alloc_single_sample_cmask(const struct radv_device *device, const struct radv_image *image,
+ struct radeon_surf *surf)
{
if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->vk.mip_levels > 1 ||
- image->vk.extent.depth > 1 || radv_image_has_dcc(image) ||
- !radv_image_use_fast_clear_for_image(device, image) ||
+ image->vk.extent.depth > 1 || radv_image_has_dcc(image) || !radv_image_use_fast_clear_for_image(device, image) ||
(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
return;
image->size += 8 * image->vk.mip_levels;
}
- if ((radv_image_has_dcc(image) && !image->support_comp_to_single) ||
- radv_image_has_cmask(image) || radv_image_has_htile(image)) {
+ if ((radv_image_has_dcc(image) && !image->support_comp_to_single) || radv_image_has_cmask(image) ||
+ radv_image_has_htile(image)) {
image->clear_value_offset = image->size;
image->size += 8 * image->vk.mip_levels;
}
- if (radv_image_is_tc_compat_htile(image) &&
- device->physical_device->rad_info.has_tc_compat_zrange_bug) {
+ if (radv_image_is_tc_compat_htile(image) && device->physical_device->rad_info.has_tc_compat_zrange_bug) {
/* Metadata for the TC-compatible HTILE hardware bug which
* have to be fixed by updating ZRANGE_PRECISION when doing
* fast depth clears to 0.0f.
radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
{
if (device->physical_device->rad_info.gfx_level >= GFX10) {
- return !device->physical_device->rad_info.tcc_rb_non_coherent &&
- !radv_image_is_pipe_misaligned(device, image);
+ return !device->physical_device->rad_info.tcc_rb_non_coherent && !radv_image_is_pipe_misaligned(device, image);
} else if (device->physical_device->rad_info.gfx_level == GFX9) {
if (image->vk.samples == 1 &&
- (image->vk.usage &
- (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
+ (image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
!vk_format_has_stencil(image->vk.format)) {
/* Single-sample color and single-sample depth
* (not stencil) are coherent with shaders on
info.num_channels = vk_format_get_nr_components(image->vk.format);
if (!vk_format_is_depth_or_stencil(image->vk.format) && !image->shareable &&
- !(image->vk.create_flags & (VK_IMAGE_CREATE_SPARSE_ALIASED_BIT |
- VK_IMAGE_CREATE_ALIAS_BIT)) &&
+ !(image->vk.create_flags & (VK_IMAGE_CREATE_SPARSE_ALIASED_BIT | VK_IMAGE_CREATE_ALIAS_BIT)) &&
image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
info.surf_index = &device->image_mrt_offset_counter;
}
VkResult
radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
- const struct VkVideoProfileListInfoKHR *profile_list,
- struct radv_image *image)
+ const struct VkVideoProfileListInfoKHR *profile_list, struct radv_image *image)
{
/* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
* common internal case. */
info.height = vk_format_get_plane_height(image->vk.format, plane, info.height);
if (create_info.no_metadata_planes || plane_count > 1) {
- image->planes[plane].surface.flags |=
- RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE;
+ image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE;
}
device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
}
if (create_info.bo_metadata && !mod_info &&
- !ac_surface_apply_umd_metadata(&device->physical_device->rad_info,
- &image->planes[plane].surface, image->vk.samples,
- image->vk.mip_levels, create_info.bo_metadata->size_metadata,
- create_info.bo_metadata->metadata))
+ !ac_surface_apply_umd_metadata(&device->physical_device->rad_info, &image->planes[plane].surface,
+ image->vk.samples, image->vk.mip_levels,
+ create_info.bo_metadata->size_metadata, create_info.bo_metadata->metadata))
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
- if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 &&
- !mod_info)
+ if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 && !mod_info)
radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
if (mod_info) {
offset = mod_info->pPlaneLayouts[plane].offset;
stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
} else {
- offset = image->disjoint ? 0 :
- align64(image->size, 1ull << image->planes[plane].surface.alignment_log2);
+ offset = image->disjoint ? 0 : align64(image->size, 1ull << image->planes[plane].surface.alignment_log2);
stride = 0; /* 0 means no override */
}
- if (!ac_surface_override_offset_stride(&device->physical_device->rad_info,
- &image->planes[plane].surface,
- image->vk.array_layers, image->vk.mip_levels,
- offset, stride))
+ if (!ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[plane].surface,
+ image->vk.array_layers, image->vk.mip_levels, offset, stride))
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
/* Validate DCC offsets in modifier layout. */
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
for (unsigned i = 1; i < mem_planes; ++i) {
- if (ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
- &image->planes[plane].surface, i,
- 0) != mod_info->pPlaneLayouts[i].offset)
+ if (ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, &image->planes[plane].surface,
+ i, 0) != mod_info->pPlaneLayouts[i].offset)
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
}
}
image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2);
- image->planes[plane].format =
- radv_image_get_plane_format(device->physical_device, image, plane);
+ image->planes[plane].format = radv_image_get_plane_format(device->physical_device, image, plane);
}
- image->tc_compatible_cmask =
- radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
+ image->tc_compatible_cmask = radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
image->l2_coherent = radv_image_is_l2_coherent(device, image);
}
static void
-radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
- struct radv_image *image)
+radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator, struct radv_image *image)
{
if ((image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bindings[0].bo) {
radv_rmv_log_bo_destroy(device, image->bindings[0].bo);
const struct radv_image_plane *plane = &image->planes[i];
const struct radeon_surf *surf = &plane->surface;
const struct util_format_description *desc = vk_format_description(plane->format);
- uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
- &plane->surface, 0, 0);
+ uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, &plane->surface, 0, 0);
fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
.dcc_retile = true,
};
- ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
- &mod_count, NULL);
+ ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format), &mod_count, NULL);
uint64_t *mods = calloc(mod_count, sizeof(*mods));
if (!mods)
return mod_list->pDrmFormatModifiers[0];
- ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
- &mod_count, mods);
+ ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format), &mod_count, mods);
for (unsigned i = 0; i < mod_count; ++i) {
for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
radv_assert(pCreateInfo->extent.height > 0);
radv_assert(pCreateInfo->extent.depth > 0);
- image =
- vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!image)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
else
- image->queue_family_mask |= 1u << vk_queue_to_radv(device->physical_device,
- pCreateInfo->pQueueFamilyIndices[i]);
+ image->queue_family_mask |=
+ 1u << vk_queue_to_radv(device->physical_device, pCreateInfo->pQueueFamilyIndices[i]);
}
const VkExternalMemoryImageCreateInfo *external_info =
modifier = explicit_mod->drmFormatModifier;
for (unsigned plane = 0; plane < plane_count; ++plane) {
- image->planes[plane].surface.flags =
- radv_get_surface_flags(device, image, plane, pCreateInfo, format);
+ image->planes[plane].surface.flags = radv_get_surface_flags(device, image, plane, pCreateInfo, format);
image->planes[plane].surface.modifier = modifier;
}
- if (image->vk.external_handle_types &
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID) {
+ if (image->vk.external_handle_types & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID) {
#ifdef ANDROID
image->vk.ahb_format = radv_ahb_format_for_vk_format(image->vk.format);
#endif
image->size = align64(image->size, image->alignment);
image->bindings[0].offset = 0;
- result =
- device->ws->buffer_create(device->ws, image->size, image->alignment, 0,
- RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0,
- &image->bindings[0].bo);
+ result = device->ws->buffer_create(device->ws, image->size, image->alignment, 0, RADEON_FLAG_VIRTUAL,
+ RADV_BO_PRIORITY_VIRTUAL, 0, &image->bindings[0].bo);
if (result != VK_SUCCESS) {
radv_destroy_image(device, alloc, image);
return vk_error(device, result);
}
static inline void
-compute_non_block_compressed_view(struct radv_device *device,
- const struct radv_image_view *iview,
+compute_non_block_compressed_view(struct radv_device *device, const struct radv_image_view *iview,
struct ac_surf_nbc_view *nbc_view)
{
const struct radv_image *image = iview->image;
struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws);
struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image);
- ac_surface_compute_nbc_view(addrlib, &device->physical_device->rad_info, surf, &surf_info,
- iview->vk.base_mip_level, iview->vk.base_array_layer, nbc_view);
+ ac_surface_compute_nbc_view(addrlib, &device->physical_device->rad_info, surf, &surf_info, iview->vk.base_mip_level,
+ iview->vk.base_array_layer, nbc_view);
}
static void
-radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device,
- VkFormat vk_format, const VkComponentMapping *components,
- float min_lod,
- bool is_storage_image, bool disable_compression,
- bool enable_compression, unsigned plane_id,
+radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device, VkFormat vk_format,
+ const VkComponentMapping *components, float min_lod, bool is_storage_image,
+ bool disable_compression, bool enable_compression, unsigned plane_id,
unsigned descriptor_plane_id, VkImageCreateFlags img_create_flags,
const struct ac_surf_nbc_view *nbc_view,
const VkImageViewSlicedCreateInfoEXT *sliced_3d)
assert(vk_format_get_plane_count(vk_format) == 1);
assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
- blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) *
- vk_format_get_blockwidth(vk_format);
+ blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
if (device->physical_device->rad_info.gfx_level >= GFX9) {
hw_level = iview->vk.base_mip_level;
}
}
- radv_make_texture_descriptor(
- device, image, is_storage_image, iview->vk.view_type, vk_format, components, hw_level,
- hw_level + iview->vk.level_count - 1, first_layer,
- iview->vk.base_array_layer + iview->vk.layer_count - 1,
- vk_format_get_plane_width(image->vk.format, plane_id, iview->extent.width),
- vk_format_get_plane_height(image->vk.format, plane_id, iview->extent.height),
- iview->extent.depth, min_lod, descriptor->plane_descriptors[descriptor_plane_id],
- descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor,
- img_create_flags, nbc_view, sliced_3d);
+ radv_make_texture_descriptor(device, image, is_storage_image, iview->vk.view_type, vk_format, components, hw_level,
+ hw_level + iview->vk.level_count - 1, first_layer,
+ iview->vk.base_array_layer + iview->vk.layer_count - 1,
+ vk_format_get_plane_width(image->vk.format, plane_id, iview->extent.width),
+ vk_format_get_plane_height(image->vk.format, plane_id, iview->extent.height),
+ iview->extent.depth, min_lod, descriptor->plane_descriptors[descriptor_plane_id],
+ descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor,
+ img_create_flags, nbc_view, sliced_3d);
const struct legacy_surf_level *base_level_info = NULL;
if (device->physical_device->rad_info.gfx_level <= GFX9) {
if (is_storage_image && !(enable_write_compression || enable_compression))
disable_compression = true;
si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->vk.base_mip_level,
- iview->vk.base_mip_level, blk_w, is_stencil, is_storage_image,
- disable_compression, enable_write_compression,
- descriptor->plane_descriptors[descriptor_plane_id], nbc_view);
+ iview->vk.base_mip_level, blk_w, is_stencil, is_storage_image, disable_compression,
+ enable_write_compression, descriptor->plane_descriptors[descriptor_plane_id],
+ nbc_view);
}
static unsigned
* Determine if the given image view can be fast cleared.
*/
static bool
-radv_image_view_can_fast_clear(const struct radv_device *device,
- const struct radv_image_view *iview)
+radv_image_view_can_fast_clear(const struct radv_device *device, const struct radv_image_view *iview)
{
struct radv_image *image;
void
radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
- const VkImageViewCreateInfo *pCreateInfo,
- VkImageCreateFlags img_create_flags,
+ const VkImageViewCreateInfo *pCreateInfo, VkImageCreateFlags img_create_flags,
const struct radv_image_view_extra_create_info *extra_create_info)
{
RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
switch (image->vk.image_type) {
case VK_IMAGE_TYPE_1D:
case VK_IMAGE_TYPE_2D:
- assert(range->baseArrayLayer + vk_image_subresource_layer_count(&image->vk, range) - 1 <=
- image->vk.array_layers);
+ assert(range->baseArrayLayer + vk_image_subresource_layer_count(&image->vk, range) - 1 <= image->vk.array_layers);
break;
case VK_IMAGE_TYPE_3D:
assert(range->baseArrayLayer + vk_image_subresource_layer_count(&image->vk, range) - 1 <=
* block compatible format and the compressed format, so even if we take
* the plain converted dimensions the physical layout is correct.
*/
- if (device->physical_device->rad_info.gfx_level >= GFX9 &&
- vk_format_is_block_compressed(image->vk.format) &&
+ if (device->physical_device->rad_info.gfx_level >= GFX9 && vk_format_is_block_compressed(image->vk.format) &&
!vk_format_is_block_compressed(iview->vk.format)) {
/* If we have multiple levels in the view we should ideally take the last level,
* but the mip calculation has a max(..., 1) so walking back to the base mip in an
for (unsigned i = 0; i < plane_count; ++i) {
VkFormat format = vk_format_get_plane_format(iview->vk.view_format, i);
radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, min_lod, false,
- disable_compression, enable_compression, iview->plane_id + i,
- i, img_create_flags, &iview->nbc_view, NULL);
+ disable_compression, enable_compression, iview->plane_id + i, i, img_create_flags,
+ &iview->nbc_view, NULL);
radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, min_lod, true,
- disable_compression, enable_compression, iview->plane_id + i,
- i, img_create_flags, &iview->nbc_view, sliced_3d);
+ disable_compression, enable_compression, iview->plane_id + i, i, img_create_flags,
+ &iview->nbc_view, sliced_3d);
}
}
}
bool
-radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image,
- VkImageLayout layout, unsigned queue_mask)
+radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout,
+ unsigned queue_mask)
{
switch (layout) {
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL:
if (radv_image_is_tc_compat_htile(image) ||
(radv_image_has_htile(image) &&
- !(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
- VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
+ !(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
/* Keep HTILE compressed if the image is only going to
* be used as a depth/stencil read-only attachment.
*/
}
bool
-radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
- unsigned level, VkImageLayout layout,
- unsigned queue_mask)
+radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image, unsigned level,
+ VkImageLayout layout, unsigned queue_mask)
{
- if (radv_dcc_enabled(image, level) &&
- !radv_layout_dcc_compressed(device, image, level, layout, queue_mask))
+ if (radv_dcc_enabled(image, level) && !radv_layout_dcc_compressed(device, image, level, layout, queue_mask))
return false;
if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS))
return false;
- if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
- layout != VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL)
+ if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && layout != VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL)
return false;
/* Exclusive images with CMASK or DCC can always be fast-cleared on the gfx queue. Concurrent
}
bool
-radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
- unsigned level, VkImageLayout layout, unsigned queue_mask)
+radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, unsigned level,
+ VkImageLayout layout, unsigned queue_mask)
{
if (!radv_dcc_enabled(image, level))
return false;
- if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT &&
- queue_mask & (1u << RADV_QUEUE_FOREIGN))
+ if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && queue_mask & (1u << RADV_QUEUE_FOREIGN))
return true;
/* If the image is read-only, we can always just keep it compressed */
}
enum radv_fmask_compression
-radv_layout_fmask_compression(const struct radv_device *device, const struct radv_image *image,
- VkImageLayout layout, unsigned queue_mask)
+radv_layout_fmask_compression(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout,
+ unsigned queue_mask)
{
if (!radv_image_has_fmask(image))
return RADV_FMASK_COMPRESSION_NONE;
return RADV_FMASK_COMPRESSION_NONE;
default:
/* Don't compress images that are concurrent. */
- return queue_mask == (1u << RADV_QUEUE_GENERAL) ?
- RADV_FMASK_COMPRESSION_FULL : RADV_FMASK_COMPRESSION_NONE;
+ return queue_mask == (1u << RADV_QUEUE_GENERAL) ? RADV_FMASK_COMPRESSION_FULL : RADV_FMASK_COMPRESSION_NONE;
}
}
unsigned
-radv_image_queue_family_mask(const struct radv_image *image,
- enum radv_queue_family family,
+radv_image_queue_family_mask(const struct radv_image *image, enum radv_queue_family family,
enum radv_queue_family queue_family)
{
if (!image->exclusive)
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator, VkImage *pImage)
+radv_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator,
+ VkImage *pImage)
{
#ifdef ANDROID
- const VkNativeBufferANDROID *gralloc_info =
- vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
+ const VkNativeBufferANDROID *gralloc_info = vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
if (gralloc_info)
return radv_image_from_gralloc(_device, pCreateInfo, gralloc_info, pAllocator, pImage);
const VkImageSwapchainCreateInfoKHR *swapchain_info =
vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR);
if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
- return wsi_common_create_swapchain_image(device->physical_device->vk.wsi_device,
- pCreateInfo,
- swapchain_info->swapchain,
- pImage);
+ return wsi_common_create_swapchain_image(device->physical_device->vk.wsi_device, pCreateInfo,
+ swapchain_info->swapchain, pImage);
}
#endif
- const struct wsi_image_create_info *wsi_info =
- vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
+ const struct wsi_image_create_info *wsi_info = vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
bool scanout = wsi_info && wsi_info->scanout;
bool prime_blit_src = wsi_info && wsi_info->blit_src;
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image,
- const VkImageSubresource *pSubresource, VkSubresourceLayout *pLayout)
+radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image, const VkImageSubresource *pSubresource,
+ VkSubresourceLayout *pLayout)
{
RADV_FROM_HANDLE(radv_image, image, _image);
RADV_FROM_HANDLE(radv_device, device, _device);
assert(level == 0);
assert(layer == 0);
- pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
- surface, mem_plane_id, 0);
- pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.gfx_level,
- surface, mem_plane_id, level);
+ pLayout->offset =
+ ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, surface, mem_plane_id, 0);
+ pLayout->rowPitch =
+ ac_surface_get_plane_stride(device->physical_device->rad_info.gfx_level, surface, mem_plane_id, level);
pLayout->arrayPitch = 0;
pLayout->depthPitch = 0;
pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id);
} else if (device->physical_device->rad_info.gfx_level >= GFX9) {
uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
- pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
- &plane->surface, 0, layer) +
- level_offset;
- if (image->vk.format == VK_FORMAT_R32G32B32_UINT ||
- image->vk.format == VK_FORMAT_R32G32B32_SINT ||
+ pLayout->offset =
+ ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level, &plane->surface, 0, layer) +
+ level_offset;
+ if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) {
/* Adjust the number of bytes between each row because
* the pitch is actually the number of components per
*/
pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
} else {
- uint32_t pitch =
- surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
+ uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
assert(util_is_power_of_two_nonzero(surface->bpe));
pLayout->rowPitch = pitch * surface->bpe;
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_image_view *view;
- view =
- vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (view == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
view->bo = buffer->bo;
view->range = vk_buffer_range(&buffer->vk, pCreateInfo->offset, pCreateInfo->range);
- radv_make_texel_buffer_descriptor(device, va, pCreateInfo->format, pCreateInfo->offset,
- view->range, view->state);
+ radv_make_texel_buffer_descriptor(device, va, pCreateInfo->format, pCreateInfo->offset, view->range, view->state);
}
void
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_buffer_view *view;
- view =
- vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!view)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
VKAPI_ATTR void VKAPI_CALL
-radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
- const VkAllocationCallbacks *pAllocator)
+radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, const VkAllocationCallbacks *pAllocator)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
#include "vk_instance.h"
#include "vk_util.h"
-static const struct debug_control radv_debug_options[] = {
- {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
- {"nodcc", RADV_DEBUG_NO_DCC},
- {"shaders", RADV_DEBUG_DUMP_SHADERS},
- {"nocache", RADV_DEBUG_NO_CACHE},
- {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
- {"nohiz", RADV_DEBUG_NO_HIZ},
- {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
- {"allbos", RADV_DEBUG_ALL_BOS},
- {"noibs", RADV_DEBUG_NO_IBS},
- {"spirv", RADV_DEBUG_DUMP_SPIRV},
- {"vmfaults", RADV_DEBUG_VM_FAULTS},
- {"zerovram", RADV_DEBUG_ZERO_VRAM},
- {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
- {"preoptir", RADV_DEBUG_PREOPTIR},
- {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
- {"info", RADV_DEBUG_INFO},
- {"startup", RADV_DEBUG_STARTUP},
- {"checkir", RADV_DEBUG_CHECKIR},
- {"nobinning", RADV_DEBUG_NOBINNING},
- {"nongg", RADV_DEBUG_NO_NGG},
- {"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
- {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
- {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
- {"llvm", RADV_DEBUG_LLVM},
- {"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
- {"hang", RADV_DEBUG_HANG},
- {"img", RADV_DEBUG_IMG},
- {"noumr", RADV_DEBUG_NO_UMR},
- {"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},
- {"splitfma", RADV_DEBUG_SPLIT_FMA},
- {"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC},
- {"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},
- {"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},
- {"noatocdithering", RADV_DEBUG_NO_ATOC_DITHERING},
- {"nonggc", RADV_DEBUG_NO_NGGC},
- {"prologs", RADV_DEBUG_DUMP_PROLOGS},
- {"nodma", RADV_DEBUG_NO_DMA_BLIT},
- {"epilogs", RADV_DEBUG_DUMP_EPILOGS},
- {"nofmask", RADV_DEBUG_NO_FMASK},
- {"shadowregs", RADV_DEBUG_SHADOW_REGS},
- {"extra_md", RADV_DEBUG_EXTRA_MD},
- {"nogpl", RADV_DEBUG_NO_GPL},
- {"videoarraypath", RADV_DEBUG_VIDEO_ARRAY_PATH},
- {"nort", RADV_DEBUG_NO_RT},
- {NULL, 0}};
+static const struct debug_control radv_debug_options[] = {{"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
+ {"nodcc", RADV_DEBUG_NO_DCC},
+ {"shaders", RADV_DEBUG_DUMP_SHADERS},
+ {"nocache", RADV_DEBUG_NO_CACHE},
+ {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
+ {"nohiz", RADV_DEBUG_NO_HIZ},
+ {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
+ {"allbos", RADV_DEBUG_ALL_BOS},
+ {"noibs", RADV_DEBUG_NO_IBS},
+ {"spirv", RADV_DEBUG_DUMP_SPIRV},
+ {"vmfaults", RADV_DEBUG_VM_FAULTS},
+ {"zerovram", RADV_DEBUG_ZERO_VRAM},
+ {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
+ {"preoptir", RADV_DEBUG_PREOPTIR},
+ {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
+ {"info", RADV_DEBUG_INFO},
+ {"startup", RADV_DEBUG_STARTUP},
+ {"checkir", RADV_DEBUG_CHECKIR},
+ {"nobinning", RADV_DEBUG_NOBINNING},
+ {"nongg", RADV_DEBUG_NO_NGG},
+ {"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
+ {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
+ {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
+ {"llvm", RADV_DEBUG_LLVM},
+ {"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
+ {"hang", RADV_DEBUG_HANG},
+ {"img", RADV_DEBUG_IMG},
+ {"noumr", RADV_DEBUG_NO_UMR},
+ {"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},
+ {"splitfma", RADV_DEBUG_SPLIT_FMA},
+ {"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC},
+ {"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},
+ {"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},
+ {"noatocdithering", RADV_DEBUG_NO_ATOC_DITHERING},
+ {"nonggc", RADV_DEBUG_NO_NGGC},
+ {"prologs", RADV_DEBUG_DUMP_PROLOGS},
+ {"nodma", RADV_DEBUG_NO_DMA_BLIT},
+ {"epilogs", RADV_DEBUG_DUMP_EPILOGS},
+ {"nofmask", RADV_DEBUG_NO_FMASK},
+ {"shadowregs", RADV_DEBUG_SHADOW_REGS},
+ {"extra_md", RADV_DEBUG_EXTRA_MD},
+ {"nogpl", RADV_DEBUG_NO_GPL},
+ {"videoarraypath", RADV_DEBUG_VIDEO_ARRAY_PATH},
+ {"nort", RADV_DEBUG_NO_RT},
+ {NULL, 0}};
const char *
radv_get_debug_option_name(int id)
return radv_debug_options[id].string;
}
-static const struct debug_control radv_perftest_options[] = {
- {"localbos", RADV_PERFTEST_LOCAL_BOS},
- {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
- {"bolist", RADV_PERFTEST_BO_LIST},
- {"cswave32", RADV_PERFTEST_CS_WAVE_32},
- {"pswave32", RADV_PERFTEST_PS_WAVE_32},
- {"gewave32", RADV_PERFTEST_GE_WAVE_32},
- {"nosam", RADV_PERFTEST_NO_SAM},
- {"sam", RADV_PERFTEST_SAM},
- {"rt", RADV_PERFTEST_RT},
- {"nggc", RADV_PERFTEST_NGGC},
- {"emulate_rt", RADV_PERFTEST_EMULATE_RT},
- {"rtwave64", RADV_PERFTEST_RT_WAVE_64},
- {"ngg_streamout", RADV_PERFTEST_NGG_STREAMOUT},
- {"video_decode", RADV_PERFTEST_VIDEO_DECODE},
- {"dmashaders", RADV_PERFTEST_DMA_SHADERS},
- {NULL, 0}};
+static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_PERFTEST_LOCAL_BOS},
+ {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
+ {"bolist", RADV_PERFTEST_BO_LIST},
+ {"cswave32", RADV_PERFTEST_CS_WAVE_32},
+ {"pswave32", RADV_PERFTEST_PS_WAVE_32},
+ {"gewave32", RADV_PERFTEST_GE_WAVE_32},
+ {"nosam", RADV_PERFTEST_NO_SAM},
+ {"sam", RADV_PERFTEST_SAM},
+ {"rt", RADV_PERFTEST_RT},
+ {"nggc", RADV_PERFTEST_NGGC},
+ {"emulate_rt", RADV_PERFTEST_EMULATE_RT},
+ {"rtwave64", RADV_PERFTEST_RT_WAVE_64},
+ {"ngg_streamout", RADV_PERFTEST_NGG_STREAMOUT},
+ {"video_decode", RADV_PERFTEST_VIDEO_DECODE},
+ {"dmashaders", RADV_PERFTEST_DMA_SHADERS},
+ {NULL, 0}};
const char *
radv_get_perftest_option_name(int id)
static void
radv_init_dri_options(struct radv_instance *instance)
{
- driParseOptionInfo(&instance->available_dri_options, radv_dri_options,
- ARRAY_SIZE(radv_dri_options));
- driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "radv", NULL,
- NULL, instance->vk.app_info.app_name, instance->vk.app_info.app_version,
+ driParseOptionInfo(&instance->available_dri_options, radv_dri_options, ARRAY_SIZE(radv_dri_options));
+ driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "radv", NULL, NULL,
+ instance->vk.app_info.app_name, instance->vk.app_info.app_version,
instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
- instance->enable_mrt_output_nan_fixup =
- driQueryOptionb(&instance->dri_options, "radv_enable_mrt_output_nan_fixup");
+ instance->enable_mrt_output_nan_fixup = driQueryOptionb(&instance->dri_options, "radv_enable_mrt_output_nan_fixup");
- instance->disable_shrink_image_store =
- driQueryOptionb(&instance->dri_options, "radv_disable_shrink_image_store");
+ instance->disable_shrink_image_store = driQueryOptionb(&instance->dri_options, "radv_disable_shrink_image_store");
- instance->absolute_depth_bias =
- driQueryOptionb(&instance->dri_options, "radv_absolute_depth_bias");
+ instance->absolute_depth_bias = driQueryOptionb(&instance->dri_options, "radv_absolute_depth_bias");
instance->disable_tc_compat_htile_in_general =
driQueryOptionb(&instance->dri_options, "radv_disable_tc_compat_htile_general");
instance->zero_vram = driQueryOptionb(&instance->dri_options, "radv_zero_vram");
- instance->disable_aniso_single_level =
- driQueryOptionb(&instance->dri_options, "radv_disable_aniso_single_level");
+ instance->disable_aniso_single_level = driQueryOptionb(&instance->dri_options, "radv_disable_aniso_single_level");
instance->disable_sinking_load_input_fs =
driQueryOptionb(&instance->dri_options, "radv_disable_sinking_load_input_fs");
- instance->flush_before_query_copy =
- driQueryOptionb(&instance->dri_options, "radv_flush_before_query_copy");
+ instance->flush_before_query_copy = driQueryOptionb(&instance->dri_options, "radv_flush_before_query_copy");
- instance->enable_unified_heap_on_apu =
- driQueryOptionb(&instance->dri_options, "radv_enable_unified_heap_on_apu");
+ instance->enable_unified_heap_on_apu = driQueryOptionb(&instance->dri_options, "radv_enable_unified_heap_on_apu");
instance->tex_non_uniform = driQueryOptionb(&instance->dri_options, "radv_tex_non_uniform");
};
VKAPI_ATTR VkResult VKAPI_CALL
-radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator, VkInstance *pInstance)
+radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator,
+ VkInstance *pInstance)
{
struct radv_instance *instance;
VkResult result;
vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &radv_instance_entrypoints, true);
vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &wsi_instance_entrypoints, false);
- result = vk_instance_init(&instance->vk, &radv_instance_extensions_supported, &dispatch_table,
- pCreateInfo, pAllocator);
+ result =
+ vk_instance_init(&instance->vk, &radv_instance_extensions_supported, &dispatch_table, pCreateInfo, pAllocator);
if (result != VK_SUCCESS) {
vk_free(pAllocator, instance);
return vk_error(NULL, result);
if (pLayerName)
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
- return vk_enumerate_instance_extension_properties(&radv_instance_extensions_supported,
- pPropertyCount, pProperties);
+ return vk_enumerate_instance_extension_properties(&radv_instance_extensions_supported, pPropertyCount, pProperties);
}
VKAPI_ATTR VkResult VKAPI_CALL
#include <list>
class radv_llvm_per_thread_info {
public:
- radv_llvm_per_thread_info(enum radeon_family arg_family,
- enum ac_target_machine_options arg_tm_options, unsigned arg_wave_size)
+ radv_llvm_per_thread_info(enum radeon_family arg_family, enum ac_target_machine_options arg_tm_options,
+ unsigned arg_wave_size)
: family(arg_family), tm_options(arg_tm_options), wave_size(arg_wave_size), passes(NULL)
{
}
return ac_compile_module_to_elf(passes, module, pelf_buffer, pelf_size);
}
- bool is_same(enum radeon_family arg_family, enum ac_target_machine_options arg_tm_options,
- unsigned arg_wave_size)
+ bool is_same(enum radeon_family arg_family, enum ac_target_machine_options arg_tm_options, unsigned arg_wave_size)
{
if (arg_family == family && arg_tm_options == tm_options && arg_wave_size == wave_size)
return true;
static thread_local std::list<radv_llvm_per_thread_info> radv_llvm_per_thread_list;
bool
-radv_compile_to_elf(struct ac_llvm_compiler *info, LLVMModuleRef module, char **pelf_buffer,
- size_t *pelf_size)
+radv_compile_to_elf(struct ac_llvm_compiler *info, LLVMModuleRef module, char **pelf_buffer, size_t *pelf_size)
{
radv_llvm_per_thread_info *thread_info = nullptr;
bool radv_init_llvm_compiler(struct ac_llvm_compiler *info, enum radeon_family family,
enum ac_target_machine_options tm_options, unsigned wave_size);
-bool radv_compile_to_elf(struct ac_llvm_compiler *info, LLVMModuleRef module, char **pelf_buffer,
- size_t *pelf_size);
+bool radv_compile_to_elf(struct ac_llvm_compiler *info, LLVMModuleRef module, char **pelf_buffer, size_t *pelf_size);
#ifdef __cplusplus
}
#include "radv_shader_args.h"
#include "ac_binary.h"
-#include "ac_nir.h"
#include "ac_llvm_build.h"
+#include "ac_nir.h"
#include "ac_nir_to_llvm.h"
#include "ac_shader_abi.h"
#include "ac_shader_util.h"
while (mask) {
int i = u_bit_scan(&mask);
- ctx->descriptor_sets[i] =
- ac_build_load_to_sgpr(&ctx->ac, desc_sets, LLVMConstInt(ctx->ac.i32, i, false));
+ ctx->descriptor_sets[i] = ac_build_load_to_sgpr(&ctx->ac, desc_sets, LLVMConstInt(ctx->ac.i32, i, false));
LLVMSetAlignment(ctx->descriptor_sets[i], 4);
}
} else {
}
}
- ctx->main_function =
- create_llvm_function(&ctx->ac, ctx->ac.module, ctx->ac.builder, &ctx->args->ac,
- get_llvm_calling_convention(ctx->main_function.value, stage),
- ctx->max_workgroup_size, ctx->options);
+ ctx->main_function = create_llvm_function(&ctx->ac, ctx->ac.module, ctx->ac.builder, &ctx->args->ac,
+ get_llvm_calling_convention(ctx->main_function.value, stage),
+ ctx->max_workgroup_size, ctx->options);
load_descriptor_sets(ctx);
- if (stage == MESA_SHADER_TESS_CTRL ||
- (stage == MESA_SHADER_VERTEX && ctx->shader_info->vs.as_ls) ||
+ if (stage == MESA_SHADER_TESS_CTRL || (stage == MESA_SHADER_VERTEX && ctx->shader_info->vs.as_ls) ||
ctx->shader_info->is_ngg ||
/* GFX9 has the ESGS ring buffer in LDS. */
(stage == MESA_SHADER_GEOMETRY && has_previous_stage)) {
}
static LLVMValueRef
-radv_get_sampler_desc(struct ac_shader_abi *abi, LLVMValueRef index,
- enum ac_descriptor_type desc_type)
+radv_get_sampler_desc(struct ac_shader_abi *abi, LLVMValueRef index, enum ac_descriptor_type desc_type)
{
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
* use the tail from plane 1 so that we can store only the first 16 bytes
* of the last plane. */
if (desc_type == AC_DESC_PLANE_2 && index && LLVMTypeOf(index) == ctx->ac.i32) {
- LLVMValueRef plane1_addr =
- LLVMBuildSub(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 32, false), "");
+ LLVMValueRef plane1_addr = LLVMBuildSub(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 32, false), "");
LLVMValueRef descriptor1 = radv_load_rsrc(ctx, plane1_addr, ctx->ac.v8i32);
LLVMValueRef descriptor2 = radv_load_rsrc(ctx, index, ctx->ac.v4i32);
}
static void
-scan_shader_output_decl(struct radv_shader_context *ctx, struct nir_variable *variable,
- struct nir_shader *shader, gl_shader_stage stage)
+scan_shader_output_decl(struct radv_shader_context *ctx, struct nir_variable *variable, struct nir_shader *shader,
+ gl_shader_stage stage)
{
int idx = variable->data.driver_location;
unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
prepare_gs_input_vgprs(struct radv_shader_context *ctx, bool merged)
{
if (merged) {
- ctx->gs_wave_id =
- ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 16, 8);
+ ctx->gs_wave_id = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 16, 8);
} else {
ctx->gs_wave_id = ac_get_arg(&ctx->ac, ctx->args->ac.gs_wave_id);
}
{
assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
- LLVMValueRef esgs_ring = LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0),
- "esgs_ring", AC_ADDR_SPACE_LDS);
+ LLVMValueRef esgs_ring =
+ LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0), "esgs_ring", AC_ADDR_SPACE_LDS);
LLVMSetLinkage(esgs_ring, LLVMExternalLinkage);
LLVMSetAlignment(esgs_ring, 64 * 1024);
}
-static LLVMValueRef radv_intrinsic_load(struct ac_shader_abi *abi, nir_intrinsic_instr *intrin)
+static LLVMValueRef
+radv_intrinsic_load(struct ac_shader_abi *abi, nir_intrinsic_instr *intrin)
{
switch (intrin->intrinsic) {
case nir_intrinsic_load_base_vertex:
}
static LLVMModuleRef
-ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
- const struct radv_nir_compiler_options *options,
- const struct radv_shader_info *info,
- struct nir_shader *const *shaders, int shader_count,
+ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, const struct radv_nir_compiler_options *options,
+ const struct radv_shader_info *info, struct nir_shader *const *shaders, int shader_count,
const struct radv_shader_args *args)
{
struct radv_shader_context ctx = {0};
exports_color_null = !exports_mrtz || (shaders[0]->info.outputs_written & (0xffu << FRAG_RESULT_DATA0));
}
- ac_llvm_context_init(&ctx.ac, ac_llvm, options->info,
- float_mode, info->wave_size, info->ballot_bit_size, exports_color_null, exports_mrtz);
+ ac_llvm_context_init(&ctx.ac, ac_llvm, options->info, float_mode, info->wave_size, info->ballot_bit_size,
+ exports_color_null, exports_mrtz);
uint32_t length = 1;
for (uint32_t i = 0; i < shader_count; i++)
if (args->ac.instance_id.used)
ctx.abi.instance_id = ac_get_arg(&ctx.ac, args->ac.instance_id);
- if (options->info->has_ls_vgpr_init_bug &&
- shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
+ if (options->info->has_ls_vgpr_init_bug && shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
ac_fixup_ls_hs_input_vgprs(&ctx.ac, &ctx.abi, &args->ac);
if (is_ngg) {
ac_build_s_barrier(&ctx.ac, shaders[shader_idx]->info.stage);
}
- nir_foreach_shader_out_variable(variable, shaders[shader_idx]) scan_shader_output_decl(
- &ctx, variable, shaders[shader_idx], shaders[shader_idx]->info.stage);
+ nir_foreach_shader_out_variable (variable, shaders[shader_idx])
+ scan_shader_output_decl(&ctx, variable, shaders[shader_idx], shaders[shader_idx]->info.stage);
bool check_merged_wave_info = shader_count >= 2 && !(is_ngg && shader_idx == 1);
LLVMBasicBlockRef merge_block = NULL;
LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
- LLVMValueRef count = ac_unpack_param(
- &ctx.ac, ac_get_arg(&ctx.ac, args->ac.merged_wave_info), 8 * shader_idx, 8);
+ LLVMValueRef count =
+ ac_unpack_param(&ctx.ac, ac_get_arg(&ctx.ac, args->ac.merged_wave_info), 8 * shader_idx, 8);
LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac);
LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT, thread_id, count, "");
LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block);
LLVMBuildRetVoid(ctx.ac.builder);
if (options->dump_preoptir) {
- fprintf(stderr, "%s LLVM IR:\n\n",
- radv_get_shader_name(info, shaders[shader_count - 1]->info.stage));
+ fprintf(stderr, "%s LLVM IR:\n\n", radv_get_shader_name(info, shaders[shader_count - 1]->info.stage));
ac_dump_module(ctx.ac.module);
fprintf(stderr, "\n");
}
}
static unsigned
-radv_llvm_compile(LLVMModuleRef M, char **pelf_buffer, size_t *pelf_size,
- struct ac_llvm_compiler *ac_llvm)
+radv_llvm_compile(LLVMModuleRef M, char **pelf_buffer, size_t *pelf_size, struct ac_llvm_compiler *ac_llvm)
{
unsigned retval = 0;
LLVMContextRef llvm_ctx;
}
static void
-ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_module,
- struct radv_shader_binary **rbinary, const char *name,
- const struct radv_nir_compiler_options *options)
+ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_module, struct radv_shader_binary **rbinary,
+ const char *name, const struct radv_nir_compiler_options *options)
{
char *elf_buffer = NULL;
size_t elf_size = 0;
}
static void
-radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
- const struct radv_nir_compiler_options *options,
- const struct radv_shader_info *info,
- struct radv_shader_binary **rbinary,
- const struct radv_shader_args *args, struct nir_shader *const *nir,
- int nir_count)
+radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, const struct radv_nir_compiler_options *options,
+ const struct radv_shader_info *info, struct radv_shader_binary **rbinary,
+ const struct radv_shader_args *args, struct nir_shader *const *nir, int nir_count)
{
LLVMModuleRef llvm_module;
llvm_module = ac_translate_nir_to_llvm(ac_llvm, options, info, nir, nir_count, args);
- ac_compile_llvm_module(ac_llvm, llvm_module, rbinary,
- radv_get_shader_name(info, nir[nir_count - 1]->info.stage), options);
+ ac_compile_llvm_module(ac_llvm, llvm_module, rbinary, radv_get_shader_name(info, nir[nir_count - 1]->info.stage),
+ options);
}
void
-llvm_compile_shader(const struct radv_nir_compiler_options *options,
- const struct radv_shader_info *info, unsigned shader_count,
- struct nir_shader *const *shaders, struct radv_shader_binary **binary,
+llvm_compile_shader(const struct radv_nir_compiler_options *options, const struct radv_shader_info *info,
+ unsigned shader_count, struct nir_shader *const *shaders, struct radv_shader_binary **binary,
const struct radv_shader_args *args)
{
enum ac_target_machine_options tm_options = 0;
}
static void
-radv_emit_windowed_counters(struct radv_device *device, struct radeon_cmdbuf *cs, int family,
- bool enable)
+radv_emit_windowed_counters(struct radv_device *device, struct radeon_cmdbuf *cs, int family, bool enable)
{
if (family == RADV_QUEUE_GENERAL) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(enable ? V_028A90_PERFCOUNTER_START : V_028A90_PERFCOUNTER_STOP) |
- EVENT_INDEX(0));
+ radeon_emit(cs, EVENT_TYPE(enable ? V_028A90_PERFCOUNTER_START : V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
}
radeon_set_sh_reg(cs, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(enable));
radv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs)
{
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
- S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
- S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET));
+ S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
+ S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET));
}
void
{
/* Start SPM counters. */
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
- S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
- S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING));
+ S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
+ S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING));
radv_emit_windowed_counters(device, cs, family, true);
}
/* Stop SPM counters. */
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
- S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
- S_036020_SPM_PERFMON_STATE(device->physical_device->rad_info.never_stop_sq_perf_counters ?
- V_036020_STRM_PERFMON_STATE_START_COUNTING :
- V_036020_STRM_PERFMON_STATE_STOP_COUNTING));
+ S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
+ S_036020_SPM_PERFMON_STATE(device->physical_device->rad_info.never_stop_sq_perf_counters
+ ? V_036020_STRM_PERFMON_STATE_START_COUNTING
+ : V_036020_STRM_PERFMON_STATE_STOP_COUNTING));
}
enum radv_perfcounter_op {
enum radv_perfcounter_uuid uuid;
};
-#define PC_DESC(arg_op, arg_unit, arg_name, arg_category, arg_description, arg_uuid, ...) \
- (struct radv_perfcounter_desc) \
- { \
- .impl = {.op = arg_op, .regs = {__VA_ARGS__}}, \
- .unit = VK_PERFORMANCE_COUNTER_UNIT_##arg_unit##_KHR, .name = arg_name, \
- .category = arg_category, .description = arg_description, .uuid = RADV_PC_UUID_##arg_uuid \
+#define PC_DESC(arg_op, arg_unit, arg_name, arg_category, arg_description, arg_uuid, ...) \
+ (struct radv_perfcounter_desc) \
+ { \
+ .impl = {.op = arg_op, .regs = {__VA_ARGS__}}, .unit = VK_PERFORMANCE_COUNTER_UNIT_##arg_unit##_KHR, \
+ .name = arg_name, .category = arg_category, .description = arg_description, .uuid = RADV_PC_UUID_##arg_uuid \
}
-#define ADD_PC(op, unit, name, category, description, uuid, ...) \
- do { \
- if (descs) { \
- descs[*count] = PC_DESC((op), unit, name, category, description, uuid, __VA_ARGS__); \
- } \
- ++*count; \
+#define ADD_PC(op, unit, name, category, description, uuid, ...) \
+ do { \
+ if (descs) { \
+ descs[*count] = PC_DESC((op), unit, name, category, description, uuid, __VA_ARGS__); \
+ } \
+ ++*count; \
} while (0)
#define CTR(block, ctr) (S_REG_BLOCK(block) | S_REG_SEL(ctr))
#define CONSTANT(v) (S_REG_CONSTANT(1) | (uint32_t)(v))
TCP_PERF_SEL_REQ_MISS_GFX10 = CTR(TCP, 0x12),
};
-#define CTR_NUM_SIMD \
- CONSTANT(pdev->rad_info.num_simd_per_compute_unit * pdev->rad_info.num_cu)
-#define CTR_NUM_CUS CONSTANT(pdev->rad_info.num_cu)
+#define CTR_NUM_SIMD CONSTANT(pdev->rad_info.num_simd_per_compute_unit * pdev->rad_info.num_cu)
+#define CTR_NUM_CUS CONSTANT(pdev->rad_info.num_cu)
static void
-radv_query_perfcounter_descs(struct radv_physical_device *pdev, uint32_t *count,
- struct radv_perfcounter_desc *descs)
+radv_query_perfcounter_descs(struct radv_physical_device *pdev, uint32_t *count, struct radv_perfcounter_desc *descs)
{
*count = 0;
- ADD_PC(RADV_PC_OP_MAX, CYCLES, "GPU active cycles", "GRBM",
- "cycles the GPU is active processing a command buffer.", GPU_CYCLES,
- GRBM_PERF_SEL_GUI_ACTIVE);
-
- ADD_PC(RADV_PC_OP_SUM, GENERIC, "Waves", "Shaders", "Number of waves executed", SHADER_WAVES,
- SQ_PERF_SEL_WAVES);
- ADD_PC(RADV_PC_OP_SUM, GENERIC, "Instructions", "Shaders", "Number of Instructions executed",
- SHADER_INSTRUCTIONS, SQ_PERF_SEL_INSTS_ALL_GFX10);
- ADD_PC(RADV_PC_OP_SUM, GENERIC, "VALU Instructions", "Shaders",
- "Number of VALU Instructions executed", SHADER_INSTRUCTIONS_VALU,
- SQ_PERF_SEL_INSTS_VALU_GFX10);
- ADD_PC(RADV_PC_OP_SUM, GENERIC, "SALU Instructions", "Shaders",
- "Number of SALU Instructions executed", SHADER_INSTRUCTIONS_SALU,
- SQ_PERF_SEL_INSTS_SALU_GFX10);
- ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Load Instructions", "Shaders",
- "Number of VMEM load instructions executed", SHADER_INSTRUCTIONS_VMEM_LOAD,
- SQ_PERF_SEL_INSTS_TEX_LOAD_GFX10);
- ADD_PC(RADV_PC_OP_SUM, GENERIC, "SMEM Load Instructions", "Shaders",
- "Number of SMEM load instructions executed", SHADER_INSTRUCTIONS_SMEM_LOAD,
- SQ_PERF_SEL_INSTS_SMEM_GFX10);
- ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Store Instructions", "Shaders",
- "Number of VMEM store instructions executed", SHADER_INSTRUCTIONS_VMEM_STORE,
- SQ_PERF_SEL_INSTS_TEX_STORE_GFX10);
- ADD_PC(RADV_PC_OP_SUM, GENERIC, "LDS Instructions", "Shaders",
- "Number of LDS Instructions executed", SHADER_INSTRUCTIONS_LDS,
- SQ_PERF_SEL_INSTS_LDS_GFX10);
- ADD_PC(RADV_PC_OP_SUM, GENERIC, "GDS Instructions", "Shaders",
- "Number of GDS Instructions executed", SHADER_INSTRUCTIONS_GDS,
- SQ_PERF_SEL_INSTS_GDS_GFX10);
+ ADD_PC(RADV_PC_OP_MAX, CYCLES, "GPU active cycles", "GRBM", "cycles the GPU is active processing a command buffer.",
+ GPU_CYCLES, GRBM_PERF_SEL_GUI_ACTIVE);
+
+ ADD_PC(RADV_PC_OP_SUM, GENERIC, "Waves", "Shaders", "Number of waves executed", SHADER_WAVES, SQ_PERF_SEL_WAVES);
+ ADD_PC(RADV_PC_OP_SUM, GENERIC, "Instructions", "Shaders", "Number of Instructions executed", SHADER_INSTRUCTIONS,
+ SQ_PERF_SEL_INSTS_ALL_GFX10);
+ ADD_PC(RADV_PC_OP_SUM, GENERIC, "VALU Instructions", "Shaders", "Number of VALU Instructions executed",
+ SHADER_INSTRUCTIONS_VALU, SQ_PERF_SEL_INSTS_VALU_GFX10);
+ ADD_PC(RADV_PC_OP_SUM, GENERIC, "SALU Instructions", "Shaders", "Number of SALU Instructions executed",
+ SHADER_INSTRUCTIONS_SALU, SQ_PERF_SEL_INSTS_SALU_GFX10);
+ ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Load Instructions", "Shaders", "Number of VMEM load instructions executed",
+ SHADER_INSTRUCTIONS_VMEM_LOAD, SQ_PERF_SEL_INSTS_TEX_LOAD_GFX10);
+ ADD_PC(RADV_PC_OP_SUM, GENERIC, "SMEM Load Instructions", "Shaders", "Number of SMEM load instructions executed",
+ SHADER_INSTRUCTIONS_SMEM_LOAD, SQ_PERF_SEL_INSTS_SMEM_GFX10);
+ ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Store Instructions", "Shaders", "Number of VMEM store instructions executed",
+ SHADER_INSTRUCTIONS_VMEM_STORE, SQ_PERF_SEL_INSTS_TEX_STORE_GFX10);
+ ADD_PC(RADV_PC_OP_SUM, GENERIC, "LDS Instructions", "Shaders", "Number of LDS Instructions executed",
+ SHADER_INSTRUCTIONS_LDS, SQ_PERF_SEL_INSTS_LDS_GFX10);
+ ADD_PC(RADV_PC_OP_SUM, GENERIC, "GDS Instructions", "Shaders", "Number of GDS Instructions executed",
+ SHADER_INSTRUCTIONS_GDS, SQ_PERF_SEL_INSTS_GDS_GFX10);
ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "VALU Busy", "Shader Utilization",
- "Percentage of time the VALU units are busy", SHADER_VALU_BUSY,
- SQ_PERF_SEL_INST_CYCLES_VALU_GFX10, CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_SIMD);
+ "Percentage of time the VALU units are busy", SHADER_VALU_BUSY, SQ_PERF_SEL_INST_CYCLES_VALU_GFX10,
+ CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_SIMD);
ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "SALU Busy", "Shader Utilization",
- "Percentage of time the SALU units are busy", SHADER_SALU_BUSY,
- SQ_PERF_SEL_INSTS_SALU_GFX10, CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_CUS);
+ "Percentage of time the SALU units are busy", SHADER_SALU_BUSY, SQ_PERF_SEL_INSTS_SALU_GFX10,
+ CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_CUS);
if (pdev->rad_info.gfx_level >= GFX10_3) {
- ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory",
- "Number of bytes read from VRAM", VRAM_READ_SIZE, GL2C_PERF_SEL_EA_RDREQ_32B_GFX103,
- CONSTANT(32), GL2C_PERF_SEL_EA_RDREQ_64B_GFX103, CONSTANT(64),
- GL2C_PERF_SEL_EA_RDREQ_96B_GFX103, CONSTANT(96), GL2C_PERF_SEL_EA_RDREQ_128B_GFX103,
+ ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory", "Number of bytes read from VRAM",
+ VRAM_READ_SIZE, GL2C_PERF_SEL_EA_RDREQ_32B_GFX103, CONSTANT(32), GL2C_PERF_SEL_EA_RDREQ_64B_GFX103,
+ CONSTANT(64), GL2C_PERF_SEL_EA_RDREQ_96B_GFX103, CONSTANT(96), GL2C_PERF_SEL_EA_RDREQ_128B_GFX103,
CONSTANT(128));
- ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM write size", "Memory",
- "Number of bytes written to VRAM", VRAM_WRITE_SIZE, GL2C_PERF_SEL_MC_WRREQ_GFX103,
- CONSTANT(32), GL2C_PERF_SEL_EA_WRREQ_64B_GFX103, CONSTANT(64), CONSTANT(0),
- CONSTANT(0), CONSTANT(0), CONSTANT(0));
+ ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM write size", "Memory", "Number of bytes written to VRAM",
+ VRAM_WRITE_SIZE, GL2C_PERF_SEL_MC_WRREQ_GFX103, CONSTANT(32), GL2C_PERF_SEL_EA_WRREQ_64B_GFX103,
+ CONSTANT(64), CONSTANT(0), CONSTANT(0), CONSTANT(0), CONSTANT(0));
} else {
- ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory",
- "Number of bytes read from VRAM", VRAM_READ_SIZE, GL2C_PERF_SEL_EA_RDREQ_32B_GFX101,
- CONSTANT(32), GL2C_PERF_SEL_EA_RDREQ_64B_GFX101, CONSTANT(64),
- GL2C_PERF_SEL_EA_RDREQ_96B_GFX101, CONSTANT(96), GL2C_PERF_SEL_EA_RDREQ_128B_GFX101,
+ ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory", "Number of bytes read from VRAM",
+ VRAM_READ_SIZE, GL2C_PERF_SEL_EA_RDREQ_32B_GFX101, CONSTANT(32), GL2C_PERF_SEL_EA_RDREQ_64B_GFX101,
+ CONSTANT(64), GL2C_PERF_SEL_EA_RDREQ_96B_GFX101, CONSTANT(96), GL2C_PERF_SEL_EA_RDREQ_128B_GFX101,
CONSTANT(128));
- ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM write size", "Memory",
- "Number of bytes written to VRAM", VRAM_WRITE_SIZE, GL2C_PERF_SEL_MC_WRREQ_GFX101,
- CONSTANT(32), GL2C_PERF_SEL_EA_WRREQ_64B_GFX101, CONSTANT(32), CONSTANT(0),
- CONSTANT(0), CONSTANT(0), CONSTANT(0));
+ ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM write size", "Memory", "Number of bytes written to VRAM",
+ VRAM_WRITE_SIZE, GL2C_PERF_SEL_MC_WRREQ_GFX101, CONSTANT(32), GL2C_PERF_SEL_EA_WRREQ_64B_GFX101,
+ CONSTANT(32), CONSTANT(0), CONSTANT(0), CONSTANT(0), CONSTANT(0));
}
- ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L0 cache hit ratio", "Memory", "Hit ratio of L0 cache",
- L0_CACHE_HIT_RATIO, TCP_PERF_SEL_REQ_MISS_GFX10, TCP_PERF_SEL_REQ_GFX10);
- ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L1 cache hit ratio", "Memory", "Hit ratio of L1 cache",
- L1_CACHE_HIT_RATIO, GL1C_PERF_SEL_REQ_MISS, GL1C_PERF_SEL_REQ);
+ ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L0 cache hit ratio", "Memory", "Hit ratio of L0 cache", L0_CACHE_HIT_RATIO,
+ TCP_PERF_SEL_REQ_MISS_GFX10, TCP_PERF_SEL_REQ_GFX10);
+ ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L1 cache hit ratio", "Memory", "Hit ratio of L1 cache", L1_CACHE_HIT_RATIO,
+ GL1C_PERF_SEL_REQ_MISS, GL1C_PERF_SEL_REQ);
if (pdev->rad_info.gfx_level >= GFX10_3) {
- ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L2 cache hit ratio", "Memory",
- "Hit ratio of L2 cache", L2_CACHE_HIT_RATIO, GL2C_PERF_SEL_MISS_GFX103,
- GL2C_PERF_SEL_REQ);
+ ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L2 cache hit ratio", "Memory", "Hit ratio of L2 cache",
+ L2_CACHE_HIT_RATIO, GL2C_PERF_SEL_MISS_GFX103, GL2C_PERF_SEL_REQ);
} else {
- ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L2 cache hit ratio", "Memory",
- "Hit ratio of L2 cache", L2_CACHE_HIT_RATIO, GL2C_PERF_SEL_MISS_GFX101,
- GL2C_PERF_SEL_REQ);
+ ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L2 cache hit ratio", "Memory", "Hit ratio of L2 cache",
+ L2_CACHE_HIT_RATIO, GL2C_PERF_SEL_MISS_GFX101, GL2C_PERF_SEL_REQ);
}
}
}
static VkResult
-radv_get_counter_registers(const struct radv_physical_device *pdevice, uint32_t num_indices,
- const uint32_t *indices, unsigned *out_num_regs, uint32_t **out_regs)
+radv_get_counter_registers(const struct radv_physical_device *pdevice, uint32_t num_indices, const uint32_t *indices,
+ unsigned *out_num_regs, uint32_t **out_regs)
{
ASSERTED uint32_t num_counters = pdevice->num_perfcounters;
const struct radv_perfcounter_desc *descs = pdevice->perfcounters;
for (unsigned i = 0; i < num_indices; ++i) {
uint32_t index = indices[i];
assert(index < num_counters);
- for (unsigned j = 0; j < ARRAY_SIZE(descs[index].impl.regs) && descs[index].impl.regs[j];
- ++j) {
+ for (unsigned j = 0; j < ARRAY_SIZE(descs[index].impl.regs) && descs[index].impl.regs[j]; ++j) {
if (!G_REG_CONSTANT(descs[index].impl.regs[j]))
regs[reg_cnt++] = descs[index].impl.regs[j];
}
static unsigned
radv_pc_get_num_instances(const struct radv_physical_device *pdevice, struct ac_pc_block *ac_block)
{
- return ac_block->num_instances *
- ((ac_block->b->b->flags & AC_PC_BLOCK_SE) ? pdevice->rad_info.max_se : 1);
+ return ac_block->num_instances * ((ac_block->b->b->flags & AC_PC_BLOCK_SE) ? pdevice->rad_info.max_se : 1);
}
static unsigned
-radv_get_num_counter_passes(const struct radv_physical_device *pdevice, unsigned num_regs,
- const uint32_t *regs)
+radv_get_num_counter_passes(const struct radv_physical_device *pdevice, unsigned num_regs, const uint32_t *regs)
{
enum ac_pc_gpu_block prev_block = NUM_GPU_BLOCK;
unsigned block_reg_count = 0;
++block_reg_count;
- passes_needed =
- MAX2(passes_needed, DIV_ROUND_UP(block_reg_count, ac_block->b->b->num_counters));
+ passes_needed = MAX2(passes_needed, DIV_ROUND_UP(block_reg_count, ac_block->b->b->num_counters));
}
return passes_needed;
}
VkResult
-radv_pc_init_query_pool(struct radv_physical_device *pdevice,
- const VkQueryPoolCreateInfo *pCreateInfo, struct radv_pc_query_pool *pool)
+radv_pc_init_query_pool(struct radv_physical_device *pdevice, const VkQueryPoolCreateInfo *pCreateInfo,
+ struct radv_pc_query_pool *pool)
{
const VkQueryPoolPerformanceCreateInfoKHR *perf_info =
vk_find_struct_const(pCreateInfo->pNext, QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR);
if (!radv_init_perfcounter_descs(pdevice))
return VK_ERROR_OUT_OF_HOST_MEMORY;
- result =
- radv_get_counter_registers(pdevice, perf_info->counterIndexCount, perf_info->pCounterIndices,
- &pool->num_pc_regs, &pool->pc_regs);
+ result = radv_get_counter_registers(pdevice, perf_info->counterIndexCount, perf_info->pCounterIndices,
+ &pool->num_pc_regs, &pool->pc_regs);
if (result != VK_SUCCESS)
return result;
}
static void
-radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count,
- unsigned *selectors)
+radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, unsigned *selectors)
{
struct ac_pc_block_base *regs = block->b->b;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
return;
for (idx = 0; idx < count; ++idx) {
- radeon_set_perfctr_reg(cmd_buffer, regs->select0[idx],
- G_REG_SEL(selectors[idx]) | regs->select_or);
+ radeon_set_perfctr_reg(cmd_buffer, regs->select0[idx], G_REG_SEL(selectors[idx]) | regs->select_or);
}
for (idx = 0; idx < regs->num_spm_counters; idx++) {
}
static void
-radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block,
- unsigned count, uint64_t va)
+radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count,
+ uint64_t va)
{
struct ac_pc_block_base *regs = block->b->b;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
reg = regs->counters[idx];
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) |
- COPY_DATA_WR_CONFIRM | COPY_DATA_COUNT_SEL); /* 64 bits */
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) | COPY_DATA_WR_CONFIRM |
+ COPY_DATA_COUNT_SEL); /* 64 bits */
radeon_emit(cs, reg >> 2);
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
- va += sizeof(uint64_t) * 2 *
- radv_pc_get_num_instances(cmd_buffer->device->physical_device, block);
+ va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(cmd_buffer->device->physical_device, block);
reg += reg_delta;
}
}
static void
-radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count,
- uint64_t va)
+radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, uint64_t va)
{
unsigned se_end = 1;
if (block->b->b->flags & AC_PC_BLOCK_SE)
}
static void
-radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool,
- uint64_t va, bool end)
+radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va, bool end)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_physical_device *pdevice = cmd_buffer->device->physical_device;
radv_emit_instance(cmd_buffer, -1, -1);
radv_emit_windowed_counters(cmd_buffer->device, cs, cmd_buffer->qf, false);
- radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
- S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) |
- S_036020_PERFMON_SAMPLE_ENABLE(1));
+ radeon_set_uconfig_reg(
+ cs, R_036020_CP_PERFMON_CNTL,
+ S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1));
for (unsigned pass = 0; pass < pool->num_passes; ++pass) {
- uint64_t pred_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) +
- PERF_CTR_BO_PASS_OFFSET + 8 * pass;
+ uint64_t pred_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET + 8 * pass;
uint64_t reg_va = va + (end ? 8 : 0);
radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
if (end) {
uint64_t signal_va = va + pool->b.stride - 8 - 8 * pass;
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(cs,
- S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cs, signal_va);
radeon_emit(cs, signal_va >> 32);
radeon_emit(cs, 1); /* value */
}
void
-radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool,
- uint64_t va)
+radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_physical_device *pdevice = cmd_buffer->device->physical_device;
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pool->b.bo);
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->perf_counter_bo);
- uint64_t perf_ctr_va =
- radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
+ uint64_t perf_ctr_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cs, perf_ctr_va);
radv_perfcounter_emit_shaders(cs, 0x7f);
for (unsigned pass = 0; pass < pool->num_passes; ++pass) {
- uint64_t pred_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) +
- PERF_CTR_BO_PASS_OFFSET + 8 * pass;
+ uint64_t pred_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET + 8 * pass;
radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
radeon_emit(cs, pred_va);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
ASSERTED unsigned cdw_max;
- cdw_max =
- radeon_check_space(cmd_buffer->device->ws, cs,
- 256 + /* Reserved for things that don't scale with passes/counters */
- 5 * pool->num_passes + /* COND_EXECs */
- pool->b.stride / 8 * 8);
+ cdw_max = radeon_check_space(cmd_buffer->device->ws, cs,
+ 256 + /* Reserved for things that don't scale with passes/counters */
+ 5 * pool->num_passes + /* COND_EXECs */
+ pool->b.stride / 8 * 8);
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pool->b.bo);
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->perf_counter_bo);
- uint64_t perf_ctr_va =
- radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
+ uint64_t perf_ctr_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
- radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0,
- EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1,
- cmd_buffer->gfx9_fence_va);
+ radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
+ EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1, cmd_buffer->gfx9_fence_va);
radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, perf_ctr_va, 1, 0xffffffff);
radv_pc_wait_idle(cmd_buffer);
result.float64 = radv_pc_sum_reg(impl->regs[0], data);
break;
case RADV_PC_OP_RATIO_DIVSCALE:
- result.float64 = radv_pc_sum_reg(impl->regs[0], data) /
- (double)radv_pc_sum_reg(impl->regs[1], data) /
+ result.float64 = radv_pc_sum_reg(impl->regs[0], data) / (double)radv_pc_sum_reg(impl->regs[1], data) /
radv_pc_sum_reg(impl->regs[2], data) * 100.0;
break;
case RADV_PC_OP_REVERSE_RATIO: {
case RADV_PC_OP_SUM_WEIGHTED_4:
result.float64 = 0.0;
for (unsigned i = 0; i < 4; ++i)
- result.float64 +=
- radv_pc_sum_reg(impl->regs[2 * i], data) * radv_pc_sum_reg(impl->regs[2 * i + 1], data);
+ result.float64 += radv_pc_sum_reg(impl->regs[2 * i], data) * radv_pc_sum_reg(impl->regs[2 * i + 1], data);
break;
default:
unreachable("unhandled performance counter operation");
pCounters[i].storage = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR;
memset(&pCounters[i].uuid, 0, sizeof(pCounters[i].uuid));
- strcpy((char*)&pCounters[i].uuid, "RADV");
+ strcpy((char *)&pCounters[i].uuid, "RADV");
const uint32_t uuid = descs[i].uuid;
memcpy(&pCounters[i].uuid[12], &uuid, sizeof(uuid));
if (pCounterDescriptions) {
pCounterDescriptions[i].sType = VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_DESCRIPTION_KHR;
- pCounterDescriptions[i].flags =
- VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_BIT_KHR;
+ pCounterDescriptions[i].flags = VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_BIT_KHR;
strcpy(pCounterDescriptions[i].name, descs[i].name);
strcpy(pCounterDescriptions[i].category, descs[i].category);
strcpy(pCounterDescriptions[i].description, descs[i].description);
VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
- VkPhysicalDevice physicalDevice,
- const VkQueryPoolPerformanceCreateInfoKHR *pPerformanceQueryCreateInfo, uint32_t *pNumPasses)
+ VkPhysicalDevice physicalDevice, const VkQueryPoolPerformanceCreateInfoKHR *pPerformanceQueryCreateInfo,
+ uint32_t *pNumPasses)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
return;
}
- assert(vk_queue_to_radv(pdevice, pPerformanceQueryCreateInfo->queueFamilyIndex) ==
- RADV_QUEUE_GENERAL);
+ assert(vk_queue_to_radv(pdevice, pPerformanceQueryCreateInfo->queueFamilyIndex) == RADV_QUEUE_GENERAL);
unsigned num_regs = 0;
uint32_t *regs = NULL;
- VkResult result =
- radv_get_counter_registers(pdevice, pPerformanceQueryCreateInfo->counterIndexCount,
- pPerformanceQueryCreateInfo->pCounterIndices, &num_regs, ®s);
+ VkResult result = radv_get_counter_registers(pdevice, pPerformanceQueryCreateInfo->counterIndexCount,
+ pPerformanceQueryCreateInfo->pCounterIndices, &num_regs, ®s);
if (result != VK_SUCCESS) {
/* Can't return an error, so log */
fprintf(stderr, "radv: Failed to allocate memory for perf counters\n");
#include "drm-uapi/amdgpu_drm.h"
#include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
#endif
-#include "git_sha1.h"
#include "winsys/null/radv_null_winsys_public.h"
+#include "git_sha1.h"
#ifdef LLVM_AVAILABLE
#include "ac_llvm_util.h"
radv_taskmesh_enabled(const struct radv_physical_device *pdevice)
{
return pdevice->use_ngg && !pdevice->use_llvm && pdevice->rad_info.gfx_level >= GFX10_3 &&
- !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE) &&
- pdevice->rad_info.has_gang_submit;
+ !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE) && pdevice->rad_info.has_gang_submit;
}
static bool
radv_vrs_attachment_enabled(const struct radv_physical_device *pdevice)
{
- return pdevice->rad_info.gfx_level >= GFX11 ||
- !(pdevice->instance->debug_flags & RADV_DEBUG_NO_HIZ);
+ return pdevice->rad_info.gfx_level >= GFX11 || !(pdevice->instance->debug_flags & RADV_DEBUG_NO_HIZ);
}
static bool
radv_NV_device_generated_commands_enabled(const struct radv_physical_device *device)
{
- return device->rad_info.gfx_level >= GFX7 &&
- !(device->instance->debug_flags & RADV_DEBUG_NO_IBS) &&
+ return device->rad_info.gfx_level >= GFX7 && !(device->instance->debug_flags & RADV_DEBUG_NO_IBS) &&
driQueryOptionb(&device->instance->dri_options, "radv_dgc");
}
out[i] = 0;
for (unsigned i = 0; i < 2 * length; ++i) {
- unsigned v =
- in[i] <= '9' ? in[i] - '0' : (in[i] >= 'a' ? (in[i] - 'a' + 10) : (in[i] - 'A' + 10));
+ unsigned v = in[i] <= '9' ? in[i] - '0' : (in[i] >= 'a' ? (in[i] - 'a' + 10) : (in[i] - 'A' + 10));
out[i / 2] |= v << (4 * (1 - i % 2));
}
}
#endif
#ifdef LLVM_AVAILABLE
- if (pdevice->use_llvm &&
- !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
+ if (pdevice->use_llvm && !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
return -1;
#endif
static uint64_t
radv_get_visible_vram_size(struct radv_physical_device *device)
{
- return MIN2(radv_get_adjusted_vram_size(device),
- (uint64_t)device->rad_info.vram_vis_size_kb * 1024);
+ return MIN2(radv_get_adjusted_vram_size(device), (uint64_t)device->rad_info.vram_vis_size_kb * 1024);
}
static uint64_t
device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
- .propertyFlags =
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
.heapIndex = gart_index,
};
}
device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
- .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
.heapIndex = visible_vram_index,
};
device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_32BIT;
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
- .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
.heapIndex = visible_vram_index,
};
device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
- .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+ .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
.heapIndex = gart_index,
};
device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_32BIT;
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
- .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+ .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
.heapIndex = gart_index,
};
}
for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
- if (((mem_type.propertyFlags &
- (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
+ if (((mem_type.propertyFlags & (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
!(device->memory_flags[i] & RADEON_FLAG_32BIT)) {
- VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |
- VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
+ VkMemoryPropertyFlags property_flags = mem_type.propertyFlags | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
device->memory_domains[type_count] = device->memory_domains[i];
}
static void
-radv_get_binning_settings(const struct radv_physical_device *pdevice,
- struct radv_binning_settings *settings)
+radv_get_binning_settings(const struct radv_physical_device *pdevice, struct radv_binning_settings *settings)
{
settings->context_states_per_bin = 1;
settings->persistent_states_per_bin = 1;
.KHR_variable_pointers = true,
.KHR_video_queue = !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE),
.KHR_video_decode_queue = !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE),
- .KHR_video_decode_h264 =
- VIDEO_CODEC_H264DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE),
- .KHR_video_decode_h265 =
- VIDEO_CODEC_H265DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE),
+ .KHR_video_decode_h264 = VIDEO_CODEC_H264DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE),
+ .KHR_video_decode_h265 = VIDEO_CODEC_H265DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE),
.KHR_vulkan_memory_model = true,
.KHR_workgroup_memory_explicit_layout = true,
.KHR_zero_initialize_workgroup_memory = true,
.EXT_border_color_swizzle = device->rad_info.gfx_level >= GFX10,
.EXT_buffer_device_address = true,
.EXT_calibrated_timestamps = RADV_SUPPORT_CALIBRATED_TIMESTAMPS &&
- !(device->rad_info.family == CHIP_RAVEN ||
- device->rad_info.family == CHIP_RAVEN2),
+ !(device->rad_info.family == CHIP_RAVEN || device->rad_info.family == CHIP_RAVEN2),
.EXT_color_write_enable = true,
.EXT_conditional_rendering = true,
.EXT_conservative_rasterization = device->rad_info.gfx_level >= GFX9,
.EXT_external_memory_host = device->rad_info.has_userptr,
.EXT_global_priority = true,
.EXT_global_priority_query = true,
- .EXT_graphics_pipeline_library =
- !device->use_llvm && !(device->instance->debug_flags & RADV_DEBUG_NO_GPL),
+ .EXT_graphics_pipeline_library = !device->use_llvm && !(device->instance->debug_flags & RADV_DEBUG_NO_GPL),
.EXT_host_query_reset = true,
.EXT_image_2d_view_of_3d = true,
.EXT_image_drm_format_modifier = device->rad_info.gfx_level >= GFX9,
.EXT_tooling_info = true,
.EXT_transform_feedback = true,
.EXT_vertex_attribute_divisor = true,
- .EXT_vertex_input_dynamic_state =
- !device->use_llvm && !radv_NV_device_generated_commands_enabled(device),
+ .EXT_vertex_input_dynamic_state = !device->use_llvm && !radv_NV_device_generated_commands_enabled(device),
.EXT_ycbcr_image_arrays = true,
.AMD_buffer_marker = true,
.AMD_device_coherent_memory = true,
* using it.
*/
.VALVE_descriptor_set_host_mapping =
- device->vk.instance->app_info.engine_name &&
- strcmp(device->vk.instance->app_info.engine_name, "vkd3d") == 0,
+ device->vk.instance->app_info.engine_name && strcmp(device->vk.instance->app_info.engine_name, "vkd3d") == 0,
.VALVE_mutable_descriptor_type = true,
};
}
static void
-radv_physical_device_get_features(const struct radv_physical_device *pdevice,
- struct vk_features *features)
+radv_physical_device_get_features(const struct radv_physical_device *pdevice, struct vk_features *features)
{
bool taskmesh_en = radv_taskmesh_enabled(pdevice);
bool has_perf_query = radv_perf_query_supported(pdevice);
- bool has_shader_image_float_minmax = pdevice->rad_info.gfx_level != GFX8 &&
- pdevice->rad_info.gfx_level != GFX9 &&
+ bool has_shader_image_float_minmax = pdevice->rad_info.gfx_level != GFX8 && pdevice->rad_info.gfx_level != GFX9 &&
pdevice->rad_info.gfx_level != GFX11;
*features = (struct vk_features){
.shaderBufferFloat16Atomics = false,
.shaderBufferFloat16AtomicAdd = false,
.shaderBufferFloat16AtomicMinMax = false,
- .shaderBufferFloat32AtomicMinMax =
- radv_has_shader_buffer_float_minmax(pdevice, 32),
- .shaderBufferFloat64AtomicMinMax =
- radv_has_shader_buffer_float_minmax(pdevice, 64),
+ .shaderBufferFloat32AtomicMinMax = radv_has_shader_buffer_float_minmax(pdevice, 32),
+ .shaderBufferFloat64AtomicMinMax = radv_has_shader_buffer_float_minmax(pdevice, 64),
.shaderSharedFloat16Atomics = false,
.shaderSharedFloat16AtomicAdd = false,
.shaderSharedFloat16AtomicMinMax = false,
* both. This limit is for the pipeline layout, not for the set layout, but
* there is no set limit, so we just set a pipeline limit. I don't think
* any app is going to hit this soon. */
- return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
- MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
+ return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS - MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
(32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
32 /* storage buffer, 32 due to potential space wasted on alignment */ +
- 32 /* sampler, largest when combined with image */ + 64 /* sampled image */ +
- 64 /* storage image */);
+ 32 /* sampler, largest when combined with image */ + 64 /* sampled image */ + 64 /* storage image */);
}
static uint32_t
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
- VkPhysicalDeviceProperties *pProperties)
+radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties *pProperties)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
VkSampleCountFlags sample_counts = 0xf;
}
static void
-radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice,
- VkPhysicalDeviceVulkan11Properties *p)
+radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice, VkPhysicalDeviceVulkan11Properties *p)
{
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
if (radv_enable_rt(pdevice, true))
p->subgroupSupportedStages |= RADV_RT_STAGE_BITS;
- p->subgroupSupportedOperations =
- VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
- VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
- VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
- VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
+ p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
+ VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
+ VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
+ VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
p->subgroupQuadOperationsInAllStages = true;
p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
}
static void
-radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
- VkPhysicalDeviceVulkan12Properties *p)
+radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice, VkPhysicalDeviceVulkan12Properties *p)
{
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;
p->shaderSignedZeroInfNanPreserveFloat32 = true;
- p->shaderDenormFlushToZeroFloat16 =
- pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
+ p->shaderDenormFlushToZeroFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
/* We support all of the depth resolve modes */
- p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT |
- VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT;
+ p->supportedDepthResolveModes =
+ VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT | VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT;
/* Average doesn't make sense for stencil so we don't support that */
p->supportedStencilResolveModes =
}
static void
-radv_get_physical_device_properties_1_3(struct radv_physical_device *pdevice,
- VkPhysicalDeviceVulkan13Properties *p)
+radv_get_physical_device_properties_1_3(struct radv_physical_device *pdevice, VkPhysicalDeviceVulkan13Properties *p)
{
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES);
p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
- p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
- MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
+ p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
p->maxInlineUniformTotalSize = UINT16_MAX;
p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = accel && gfx11plus;
p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = accel;
p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = accel;
- p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated =
- accel && gfx11plus;
+ p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = accel && gfx11plus;
p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = accel && !gfx11plus;
p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = accel && !gfx11plus;
p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
- VkPhysicalDeviceProperties2 *pProperties)
+radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties2 *pProperties)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
};
radv_get_physical_device_properties_1_3(pdevice, &core_1_3);
- vk_foreach_struct(ext, pProperties->pNext)
- {
+ vk_foreach_struct (ext, pProperties->pNext) {
if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
continue;
if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
switch (ext->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
- VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
- (VkPhysicalDevicePushDescriptorPropertiesKHR *)ext;
+ VkPhysicalDevicePushDescriptorPropertiesKHR *properties = (VkPhysicalDevicePushDescriptorPropertiesKHR *)ext;
properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
break;
}
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
- VkPhysicalDeviceShaderCorePropertiesAMD *properties =
- (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
+ VkPhysicalDeviceShaderCorePropertiesAMD *properties = (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
/* Shader engines. */
properties->shaderEngineCount = pdevice->rad_info.max_se;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
- VkPhysicalDeviceShaderCoreProperties2AMD *properties =
- (VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
+ VkPhysicalDeviceShaderCoreProperties2AMD *properties = (VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
properties->shaderCoreFeatures = 0;
properties->activeComputeUnitCount = pdevice->rad_info.num_cu;
}
#ifndef _WIN32
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
- VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
- (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
+ VkPhysicalDevicePCIBusInfoPropertiesEXT *properties = (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
properties->pciDomain = pdevice->bus_info.domain;
properties->pciBus = pdevice->bus_info.bus;
properties->pciDevice = pdevice->bus_info.dev;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
- VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
- (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
- properties->sampleLocationSampleCounts =
- VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
+ VkPhysicalDeviceSampleLocationsPropertiesEXT *properties = (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
+ properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
properties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
properties->sampleLocationCoordinateRange[0] = 0.0f;
properties->sampleLocationCoordinateRange[1] = 0.9375f;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
- VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
- (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
+ VkPhysicalDeviceLineRasterizationPropertiesEXT *props = (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
props->lineSubPixelPrecisionBits = 4;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
- VkPhysicalDeviceRobustness2PropertiesEXT *properties =
- (VkPhysicalDeviceRobustness2PropertiesEXT *)ext;
+ VkPhysicalDeviceRobustness2PropertiesEXT *properties = (VkPhysicalDeviceRobustness2PropertiesEXT *)ext;
properties->robustStorageBufferAccessSizeAlignment = 4;
properties->robustUniformBufferAccessSizeAlignment = 4;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
- VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
- (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
+ VkPhysicalDeviceCustomBorderColorPropertiesEXT *props = (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
break;
}
props->maxFragmentSizeAspectRatio = 2;
props->maxFragmentShadingRateCoverageSamples = 32;
props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT;
- props->fragmentShadingRateWithShaderDepthStencilWrites =
- !pdevice->rad_info.has_vrs_ds_export_bug;
+ props->fragmentShadingRateWithShaderDepthStencilWrites = !pdevice->rad_info.has_vrs_ds_export_bug;
props->fragmentShadingRateWithSampleMask = true;
props->fragmentShadingRateWithShaderSampleMask = false;
props->fragmentShadingRateWithConservativeRasterization = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
- VkPhysicalDeviceProvokingVertexPropertiesEXT *props =
- (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
+ VkPhysicalDeviceProvokingVertexPropertiesEXT *props = (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
props->provokingVertexModePerPipeline = true;
props->transformFeedbackPreservesTriangleFanProvokingVertex = true;
break;
pProperties->properties.limits.maxPerStageDescriptorStorageBuffers;
props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures =
pProperties->properties.limits.maxPerStageDescriptorStorageBuffers;
- props->maxDescriptorSetAccelerationStructures =
- pProperties->properties.limits.maxDescriptorSetStorageBuffers;
+ props->maxDescriptorSetAccelerationStructures = pProperties->properties.limits.maxDescriptorSetStorageBuffers;
props->maxDescriptorSetUpdateAfterBindAccelerationStructures =
pProperties->properties.limits.maxDescriptorSetStorageBuffers;
props->minAccelerationStructureScratchOffsetAlignment = 128;
}
#endif
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
- VkPhysicalDeviceMultiDrawPropertiesEXT *props =
- (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
+ VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
props->maxMultiDrawCount = 2048;
break;
}
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES: {
- VkPhysicalDeviceMaintenance4Properties *properties =
- (VkPhysicalDeviceMaintenance4Properties *)ext;
+ VkPhysicalDeviceMaintenance4Properties *properties = (VkPhysicalDeviceMaintenance4Properties *)ext;
properties->maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
break;
}
(VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext;
STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
- memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
- vk_shaderModuleIdentifierAlgorithmUUID,
+ memcpy(properties->shaderModuleIdentifierAlgorithmUUID, vk_shaderModuleIdentifierAlgorithmUUID,
sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
break;
}
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_EXT: {
- VkPhysicalDeviceMeshShaderPropertiesEXT *properties =
- (VkPhysicalDeviceMeshShaderPropertiesEXT *)ext;
+ VkPhysicalDeviceMeshShaderPropertiesEXT *properties = (VkPhysicalDeviceMeshShaderPropertiesEXT *)ext;
properties->maxTaskWorkGroupTotalCount = 4194304; /* 2^22 min required */
properties->maxTaskWorkGroupCount[0] = 65535;
properties->maxMeshOutputMemorySize = 32 * 1024; /* 32K min required */
properties->maxMeshSharedMemorySize = 28672; /* 28K min required */
properties->maxMeshPayloadAndSharedMemorySize =
- properties->maxTaskPayloadSize +
- properties->maxMeshSharedMemorySize; /* 28K min required */
+ properties->maxTaskPayloadSize + properties->maxMeshSharedMemorySize; /* 28K min required */
properties->maxMeshPayloadAndOutputMemorySize =
- properties->maxTaskPayloadSize +
- properties->maxMeshOutputMemorySize; /* 47K min required */
- properties->maxMeshOutputComponents = 128; /* 32x vec4 min required */
+ properties->maxTaskPayloadSize + properties->maxMeshOutputMemorySize; /* 47K min required */
+ properties->maxMeshOutputComponents = 128; /* 32x vec4 min required */
properties->maxMeshOutputVertices = 256;
properties->maxMeshOutputPrimitives = 256;
properties->maxMeshOutputLayers = 8;
fd = open(path, O_RDWR | O_CLOEXEC);
if (fd < 0) {
- return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "Could not open device %s: %m",
- path);
+ return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "Could not open device %s: %m", path);
}
version = drmGetVersion(fd);
}
#endif
- struct radv_physical_device *device = vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+ struct radv_physical_device *device =
+ vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
if (!device) {
result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail_fd;
}
struct vk_physical_device_dispatch_table dispatch_table;
- vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
- &radv_physical_device_entrypoints, true);
- vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
- &wsi_physical_device_entrypoints, false);
+ vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_physical_device_entrypoints, true);
+ vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &wsi_physical_device_entrypoints, false);
result = vk_physical_device_init(&device->vk, &instance->vk, NULL, NULL, &dispatch_table);
if (result != VK_SUCCESS) {
if (drm_device) {
bool reserve_vmid = radv_sqtt_enabled();
- device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags,
- reserve_vmid);
+ device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, reserve_vmid);
} else {
device->ws = radv_null_winsys_create();
}
.return_size = sizeof(accel_working),
.query = AMDGPU_INFO_ACCEL_WORKING};
- if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) <
- 0 ||
+ if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) < 0 ||
!accel_working) {
close(master_fd);
master_fd = -1;
#ifdef ANDROID
device->emulate_etc2 = !radv_device_supports_etc(device);
#else
- device->emulate_etc2 = !radv_device_supports_etc(device) &&
- driQueryOptionb(&device->instance->dri_options, "radv_require_etc2");
+ device->emulate_etc2 =
+ !radv_device_supports_etc(device) && driQueryOptionb(&device->instance->dri_options, "radv_require_etc2");
#endif
snprintf(device->name, sizeof(device->name), "AMD RADV %s%s", device->rad_info.name,
const char *marketing_name = device->ws->get_chip_name(device->ws);
snprintf(device->marketing_name, sizeof(device->name), "%s (RADV %s%s)",
- marketing_name ? marketing_name : "AMD Unknown", device->rad_info.name,
- radv_get_compiler_string(device));
+ marketing_name ? marketing_name : "AMD Unknown", device->rad_info.name, radv_get_compiler_string(device));
if (radv_device_get_cache_uuid(device, device->cache_uuid)) {
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID");
device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
- device->use_fmask =
- device->rad_info.gfx_level < GFX11 && !(device->instance->debug_flags & RADV_DEBUG_NO_FMASK);
+ device->use_fmask = device->rad_info.gfx_level < GFX11 && !(device->instance->debug_flags & RADV_DEBUG_NO_FMASK);
- device->use_ngg =
- (device->rad_info.gfx_level >= GFX10 && device->rad_info.family != CHIP_NAVI14 &&
- !(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) ||
- device->rad_info.gfx_level >= GFX11;
+ device->use_ngg = (device->rad_info.gfx_level >= GFX10 && device->rad_info.family != CHIP_NAVI14 &&
+ !(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) ||
+ device->rad_info.gfx_level >= GFX11;
/* TODO: Investigate if NGG culling helps on GFX11. */
- device->use_ngg_culling = device->use_ngg && device->rad_info.max_render_backends > 1 &&
- (device->rad_info.gfx_level == GFX10_3 ||
- (device->instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
- !(device->instance->debug_flags & RADV_DEBUG_NO_NGGC);
+ device->use_ngg_culling =
+ device->use_ngg && device->rad_info.max_render_backends > 1 &&
+ (device->rad_info.gfx_level == GFX10_3 || (device->instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
+ !(device->instance->debug_flags & RADV_DEBUG_NO_NGGC);
- device->use_ngg_streamout =
- device->use_ngg && (device->rad_info.gfx_level >= GFX11 ||
- (device->instance->perftest_flags & RADV_PERFTEST_NGG_STREAMOUT));
+ device->use_ngg_streamout = device->use_ngg && (device->rad_info.gfx_level >= GFX11 ||
+ (device->instance->perftest_flags & RADV_PERFTEST_NGG_STREAMOUT));
- device->emulate_ngg_gs_query_pipeline_stat =
- device->use_ngg && device->rad_info.gfx_level < GFX11;
+ device->emulate_ngg_gs_query_pipeline_stat = device->use_ngg && device->rad_info.gfx_level < GFX11;
/* Determine the number of threads per wave for all stages. */
device->cs_wave_size = 64;
* dependence wave32 would likely be a net-loss (as well as the SALU count becoming more
* problematic)
*/
- if (!(device->instance->perftest_flags & RADV_PERFTEST_RT_WAVE_64) &&
- !(device->instance->force_rt_wave64) && device->rad_info.gfx_level < GFX11)
+ if (!(device->instance->perftest_flags & RADV_PERFTEST_RT_WAVE_64) && !(device->instance->force_rt_wave64) &&
+ device->rad_info.gfx_level < GFX11)
device->rt_wave_size = 32;
}
if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) &&
stat(drm_device->nodes[DRM_NODE_PRIMARY], &primary_stat) != 0) {
- result =
- vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
- "failed to stat DRM primary node %s", drm_device->nodes[DRM_NODE_PRIMARY]);
+ result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM primary node %s",
+ drm_device->nodes[DRM_NODE_PRIMARY]);
goto fail_perfcounters;
}
device->primary_devid = primary_stat.st_rdev;
if ((drm_device->available_nodes & (1 << DRM_NODE_RENDER)) &&
stat(drm_device->nodes[DRM_NODE_RENDER], &render_stat) != 0) {
- result =
- vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM render node %s",
- drm_device->nodes[DRM_NODE_RENDER]);
+ result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM render node %s",
+ drm_device->nodes[DRM_NODE_RENDER]);
goto fail_perfcounters;
}
device->render_devid = render_stat.st_rdev;
goto fail_perfcounters;
}
- device->gs_table_depth =
- ac_get_gs_table_depth(device->rad_info.gfx_level, device->rad_info.family);
+ device->gs_table_depth = ac_get_gs_table_depth(device->rad_info.gfx_level, device->rad_info.family);
ac_get_hs_info(&device->rad_info, &device->hs);
ac_get_task_info(&device->rad_info, &device->task_info);
}
VkResult
-create_drm_physical_device(struct vk_instance *vk_instance, struct _drmDevice *device,
- struct vk_physical_device **out)
+create_drm_physical_device(struct vk_instance *vk_instance, struct _drmDevice *device, struct vk_physical_device **out)
{
#ifndef _WIN32
if (!(device->available_nodes & (1 << DRM_NODE_RENDER)) || device->bustype != DRM_BUS_PCI ||
}
static void
-radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice,
- uint32_t *pCount,
+radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice, uint32_t *pCount,
VkQueueFamilyProperties **pQueueFamilyProperties)
{
int num_queue_families = 1;
idx = 0;
if (*pCount >= 1) {
*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
- .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT |
- VK_QUEUE_SPARSE_BINDING_BIT,
+ .queueFlags =
+ VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
.queueCount = 1,
.timestampValidBits = 64,
.minImageTransferGranularity = (VkExtent3D){1, 1, 1},
!(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
if (*pCount > idx) {
*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
- .queueFlags =
- VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
+ .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
.queueCount = pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues,
.timestampValidBits = 64,
.minImageTransferGranularity = (VkExtent3D){1, 1, 1},
assert(*pCount <= 3);
for (uint32_t i = 0; i < *pCount; i++) {
- vk_foreach_struct(ext, pQueueFamilyProperties[i].pNext)
- {
+ vk_foreach_struct (ext, pQueueFamilyProperties[i].pNext) {
switch (ext->sType) {
case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
- VkQueueFamilyGlobalPriorityPropertiesKHR *prop =
- (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
- STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <=
- VK_MAX_GLOBAL_PRIORITY_SIZE_KHR);
+ VkQueueFamilyGlobalPriorityPropertiesKHR *prop = (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
+ STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <= VK_MAX_GLOBAL_PRIORITY_SIZE_KHR);
prop->priorityCount = ARRAY_SIZE(radv_global_queue_priorities);
- memcpy(&prop->priorities, radv_global_queue_priorities,
- sizeof(radv_global_queue_priorities));
+ memcpy(&prop->priorities, radv_global_queue_priorities, sizeof(radv_global_queue_priorities));
break;
}
case VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR: {
- VkQueueFamilyQueryResultStatusPropertiesKHR *prop =
- (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext;
+ VkQueueFamilyQueryResultStatusPropertiesKHR *prop = (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext;
prop->queryResultStatusSupport = VK_FALSE;
break;
}
case VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR: {
VkQueueFamilyVideoPropertiesKHR *prop = (VkQueueFamilyVideoPropertiesKHR *)ext;
- if (pQueueFamilyProperties[i].queueFamilyProperties.queueFlags &
- VK_QUEUE_VIDEO_DECODE_BIT_KHR)
- prop->videoCodecOperations = VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
- VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
+ if (pQueueFamilyProperties[i].queueFamilyProperties.queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR)
+ prop->videoCodecOperations =
+ VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
break;
}
default:
uint64_t total_heap_size = device->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
/* Get the different memory usages. */
- uint64_t vram_vis_internal_usage =
- device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) +
- device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
+ uint64_t vram_vis_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) +
+ device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
uint64_t gtt_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
uint64_t total_system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
/* Get the visible VRAM/GTT heap sizes and internal usages. */
uint64_t gtt_heap_size = device->memory_properties.memoryHeaps[gtt_heap_idx].size;
- uint64_t vram_vis_heap_size =
- device->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
+ uint64_t vram_vis_heap_size = device->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
- uint64_t vram_vis_internal_usage =
- device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) +
- device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
+ uint64_t vram_vis_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) +
+ device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
uint64_t gtt_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
/* Compute the total heap size, internal and system usage. */
uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
/* Compute the remaining visible VRAM size for this process. */
- uint64_t vram_vis_free_space =
- vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage);
+ uint64_t vram_vis_free_space = vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage);
/* Distribute the total free space (2/3rd as VRAM and 1/3rd as GTT) to match the heap
* sizes, and align down to the page size to be conservative.
*/
- vram_vis_free_space = ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space),
- device->rad_info.gart_page_size);
+ vram_vis_free_space =
+ ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space), device->rad_info.gart_page_size);
uint64_t gtt_free_space = total_free_space - vram_vis_free_space;
- memoryBudget->heapBudget[vram_vis_heap_idx] =
- vram_vis_free_space + vram_vis_internal_usage;
+ memoryBudget->heapBudget[vram_vis_heap_idx] = vram_vis_free_space + vram_vis_internal_usage;
memoryBudget->heapUsage[vram_vis_heap_idx] = vram_vis_internal_usage;
memoryBudget->heapBudget[gtt_heap_idx] = gtt_free_space + gtt_internal_usage;
memoryBudget->heapUsage[gtt_heap_idx] = gtt_internal_usage;
};
VKAPI_ATTR VkResult VKAPI_CALL
-radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,
- uint32_t *pTimeDomainCount,
+radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice, uint32_t *pTimeDomainCount,
VkTimeDomainEXT *pTimeDomains)
{
int d;
}
VKAPI_ATTR void VKAPI_CALL
-radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,
- VkSampleCountFlagBits samples,
+radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples,
VkMultisamplePropertiesEXT *pMultisampleProperties)
{
- VkSampleCountFlagBits supported_samples =
- VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
+ VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
if (samples & supported_samples) {
pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_GetPhysicalDeviceFragmentShadingRatesKHR(
- VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount,
- VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
+radv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount,
+ VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
{
VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates,
pFragmentShadingRateCount);
-#define append_rate(w, h, s) \
- { \
- VkPhysicalDeviceFragmentShadingRateKHR rate = { \
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \
- .sampleCounts = s, \
- .fragmentSize = {.width = w, .height = h}, \
- }; \
- vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate; \
+#define append_rate(w, h, s) \
+ { \
+ VkPhysicalDeviceFragmentShadingRateKHR rate = { \
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \
+ .sampleCounts = s, \
+ .fragmentSize = {.width = w, .height = h}, \
+ }; \
+ vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate; \
}
for (uint32_t x = 2; x >= 1; x--) {
if (x == 1 && y == 1) {
samples = ~0;
} else {
- samples = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT |
- VK_SAMPLE_COUNT_8_BIT;
+ samples = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
}
append_rate(x, y, samples);
radv_pipeline_capture_shader_stats(const struct radv_device *device, VkPipelineCreateFlags flags)
{
return (flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) ||
- (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) ||
- device->keep_shader_info;
+ (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || device->keep_shader_info;
}
void
-radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline,
- enum radv_pipeline_type type)
+radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline, enum radv_pipeline_type type)
{
vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
}
VKAPI_ATTR void VKAPI_CALL
-radv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline,
- const VkAllocationCallbacks *pAllocator)
+radv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
if (pipeline->shaders[i] && (pipeline->shaders[i]->config.scratch_bytes_per_wave || is_rt)) {
unsigned max_stage_waves = device->scratch_waves;
- scratch_bytes_per_wave =
- MAX2(scratch_bytes_per_wave, pipeline->shaders[i]->config.scratch_bytes_per_wave);
+ scratch_bytes_per_wave = MAX2(scratch_bytes_per_wave, pipeline->shaders[i]->config.scratch_bytes_per_wave);
- max_stage_waves =
- MIN2(max_stage_waves, 4 * device->physical_device->rad_info.num_cu *
- radv_get_max_waves(device, pipeline->shaders[i], i));
+ max_stage_waves = MIN2(max_stage_waves, 4 * device->physical_device->rad_info.num_cu *
+ radv_get_max_waves(device, pipeline->shaders[i], i));
max_waves = MAX2(max_waves, max_stage_waves);
}
}
if (flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
key.optimisations_disabled = 1;
- key.disable_aniso_single_level = device->instance->disable_aniso_single_level &&
- device->physical_device->rad_info.gfx_level < GFX8;
+ key.disable_aniso_single_level =
+ device->instance->disable_aniso_single_level && device->physical_device->rad_info.gfx_level < GFX8;
- key.image_2d_view_of_3d =
- device->image_2d_view_of_3d && device->physical_device->rad_info.gfx_level == GFX9;
+ key.image_2d_view_of_3d = device->image_2d_view_of_3d && device->physical_device->rad_info.gfx_level == GFX9;
key.tex_non_uniform = device->instance->tex_non_uniform;
}
void
-radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo,
- struct radv_pipeline_stage *out_stage, gl_shader_stage stage)
+radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo, struct radv_pipeline_stage *out_stage,
+ gl_shader_stage stage)
{
- const VkShaderModuleCreateInfo *minfo =
- vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO);
+ const VkShaderModuleCreateInfo *minfo = vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO);
const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *iinfo =
vk_find_struct_const(sinfo->pNext, PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT);
const struct radv_pipeline_layout *layout = data;
const struct radv_descriptor_set_layout *set_layout = layout->set[set].layout;
- const struct vk_ycbcr_conversion_state *ycbcr_samplers =
- radv_immutable_ycbcr_samplers(set_layout, binding);
+ const struct vk_ycbcr_conversion_state *ycbcr_samplers = radv_immutable_ycbcr_samplers(set_layout, binding);
if (!ycbcr_samplers)
return NULL;
}
bool
-radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
- unsigned num_components, nir_intrinsic_instr *low,
- nir_intrinsic_instr *high, void *data)
+radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components,
+ nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data)
{
if (num_components > 4)
return false;
FALLTHROUGH;
case nir_intrinsic_load_shared:
case nir_intrinsic_store_shared:
- if (bit_size * num_components ==
- 96) { /* 96 bit loads require 128 bit alignment and are split otherwise */
+ if (bit_size * num_components == 96) { /* 96 bit loads require 128 bit alignment and are split otherwise */
return align % 16 == 0;
} else if (bit_size == 16 && (align % 4)) {
/* AMD hardware can't do 2-byte aligned f16vec2 loads, but they are useful for ALU
case nir_op_isign:
case nir_op_uadd_sat:
case nir_op_usub_sat:
- return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32
- : 0;
+ return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 : 0;
case nir_op_iadd_sat:
case nir_op_isub_sat:
return bit_size == 8 || !nir_dest_is_divergent(alu->dest.dest) ? 32 : 0;
case nir_op_ine:
case nir_op_ult:
case nir_op_uge:
- return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32
- : 0;
+ return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 : 0;
default:
return 0;
}
}
enum nir_lower_non_uniform_access_type lower_non_uniform_access_types =
- nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access |
- nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access;
+ nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access | nir_lower_non_uniform_texture_access |
+ nir_lower_non_uniform_image_access;
/* In practice, most shaders do not have non-uniform-qualified
* accesses (see
NIR_PASS(_, stage->nir, nir_lower_memory_model);
nir_load_store_vectorize_options vectorize_opts = {
- .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_shared |
- nir_var_mem_global,
+ .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_shared | nir_var_mem_global,
.callback = radv_mem_vectorize_callback,
.robust_modes = 0,
/* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
NIR_PASS(progress, stage->nir, nir_opt_load_store_vectorize, &vectorize_opts);
if (progress) {
NIR_PASS(_, stage->nir, nir_copy_prop);
- NIR_PASS(_, stage->nir, nir_opt_shrink_stores,
- !device->instance->disable_shrink_image_store);
+ NIR_PASS(_, stage->nir, nir_opt_shrink_stores, !device->instance->disable_shrink_image_store);
/* Gather info again, to update whether 8/16-bit are used. */
nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
if (stage->nir->info.uses_resource_info_query)
NIR_PASS(_, stage->nir, ac_nir_lower_resinfo, gfx_level);
- NIR_PASS_V(stage->nir, radv_nir_apply_pipeline_layout, device, pipeline_layout, &stage->info,
- &stage->args);
+ NIR_PASS_V(stage->nir, radv_nir_apply_pipeline_layout, device, pipeline_layout, &stage->info, &stage->args);
if (!pipeline_key->optimisations_disabled) {
NIR_PASS(_, stage->nir, nir_opt_shrink_vectors);
sink_opts |= nir_move_load_input;
NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
- NIR_PASS(_, stage->nir, nir_opt_move,
- nir_move_load_input | nir_move_const_undef | nir_move_copies);
+ NIR_PASS(_, stage->nir, nir_opt_move, nir_move_load_input | nir_move_const_undef | nir_move_copies);
}
/* Lower VS inputs. We need to do this after nir_opt_sink, because
* load_input can be reordered, but buffer loads can't.
*/
if (stage->stage == MESA_SHADER_VERTEX) {
- NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, pipeline_key,
- &device->physical_device->rad_info);
+ NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, pipeline_key, &device->physical_device->rad_info);
}
/* Lower I/O intrinsics to memory instructions. */
NIR_PASS_V(stage->nir, ac_nir_lower_legacy_vs, gfx_level,
stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask,
stage->info.outinfo.vs_output_param_offset, stage->info.outinfo.param_exports,
- stage->info.outinfo.export_prim_id, false, false,
- stage->info.force_vrs_per_vertex);
+ stage->info.outinfo.export_prim_id, false, false, stage->info.force_vrs_per_vertex);
} else {
ac_nir_gs_output_info gs_out_info = {
* present.
*/
.alpha_to_coverage_via_mrtz = stage->info.ps.writes_mrt0_alpha,
- .dual_src_blend_swizzle =
- pipeline_key->ps.epilog.mrt0_is_dual_src && gfx_level >= GFX11,
+ .dual_src_blend_swizzle = pipeline_key->ps.epilog.mrt0_is_dual_src && gfx_level >= GFX11,
/* Need to filter out unwritten color slots. */
- .spi_shader_col_format =
- pipeline_key->ps.epilog.spi_shader_col_format & stage->info.ps.colors_written,
+ .spi_shader_col_format = pipeline_key->ps.epilog.spi_shader_col_format & stage->info.ps.colors_written,
.color_is_int8 = pipeline_key->ps.epilog.color_is_int8,
.color_is_int10 = pipeline_key->ps.epilog.color_is_int10,
.alpha_func = COMPARE_FUNC_ALWAYS,
NIR_PASS(_, stage->nir, ac_nir_lower_global_access);
NIR_PASS_V(stage->nir, radv_nir_lower_abi, gfx_level, &stage->info, &stage->args, pipeline_key,
device->physical_device->rad_info.address32_hi);
- radv_optimize_nir_algebraic(stage->nir, io_to_mem || lowered_ngg ||
- stage->stage == MESA_SHADER_COMPUTE ||
- stage->stage == MESA_SHADER_TASK);
+ radv_optimize_nir_algebraic(
+ stage->nir, io_to_mem || lowered_ngg || stage->stage == MESA_SHADER_COMPUTE || stage->stage == MESA_SHADER_TASK);
if (stage->nir->info.bit_sizes_int & (8 | 16)) {
if (gfx_level >= GFX8) {
if (gfx_level >= GFX8)
NIR_PASS(_, stage->nir, nir_opt_remove_phis); /* cleanup LCSSA phis */
}
- if (((stage->nir->info.bit_sizes_int | stage->nir->info.bit_sizes_float) & 16) &&
- gfx_level >= GFX9) {
+ if (((stage->nir->info.bit_sizes_int | stage->nir->info.bit_sizes_float) & 16) && gfx_level >= GFX9) {
bool separate_g16 = gfx_level >= GFX10;
struct nir_fold_tex_srcs_options fold_srcs_options[] = {
{
- .sampler_dims =
- ~(BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) | BITFIELD_BIT(GLSL_SAMPLER_DIM_BUF)),
- .src_types = (1 << nir_tex_src_coord) | (1 << nir_tex_src_lod) |
- (1 << nir_tex_src_bias) | (1 << nir_tex_src_min_lod) |
- (1 << nir_tex_src_ms_index) |
+ .sampler_dims = ~(BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) | BITFIELD_BIT(GLSL_SAMPLER_DIM_BUF)),
+ .src_types = (1 << nir_tex_src_coord) | (1 << nir_tex_src_lod) | (1 << nir_tex_src_bias) |
+ (1 << nir_tex_src_min_lod) | (1 << nir_tex_src_ms_index) |
(separate_g16 ? 0 : (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy)),
},
{
sink_opts |= nir_move_comparisons | nir_move_load_ubo | nir_move_load_ssbo;
NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
- nir_move_options move_opts = nir_move_const_undef | nir_move_load_ubo | nir_move_load_input |
- nir_move_comparisons | nir_move_copies;
+ nir_move_options move_opts =
+ nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | nir_move_comparisons | nir_move_copies;
NIR_PASS(_, stage->nir, nir_opt_move, move_opts);
}
}
if (!pipeline->shaders[i])
continue;
- if (i == MESA_SHADER_GEOMETRY &&
- !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) {
+ if (i == MESA_SHADER_GEOMETRY && !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) {
ret += 2u;
} else {
ret += 1u;
}
static struct radv_shader *
-radv_get_shader_from_executable_index(struct radv_pipeline *pipeline, int index,
- gl_shader_stage *stage)
+radv_get_shader_from_executable_index(struct radv_pipeline *pipeline, int index, gl_shader_stage *stage)
{
if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
*stage = MESA_SHADER_INTERSECTION;
--index;
- if (i == MESA_SHADER_GEOMETRY &&
- !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) {
+ if (i == MESA_SHADER_GEOMETRY && !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) {
if (!index) {
*stage = i;
return pipeline->gs_copy_shader;
VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPipelineExecutablePropertiesKHR(VkDevice _device, const VkPipelineInfoKHR *pPipelineInfo,
- uint32_t *pExecutableCount,
- VkPipelineExecutablePropertiesKHR *pProperties)
+ uint32_t *pExecutableCount, VkPipelineExecutablePropertiesKHR *pProperties)
{
RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelineInfo->pipeline);
const uint32_t total_count = radv_get_executable_count(pipeline);
desc_copy(pProperties[executable_idx].description, description);
++executable_idx;
- if (i == MESA_SHADER_GEOMETRY &&
- !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) {
+ if (i == MESA_SHADER_GEOMETRY && !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) {
assert(pipeline->gs_copy_shader);
if (executable_idx >= count)
break;
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_GetPipelineExecutableStatisticsKHR(VkDevice _device,
- const VkPipelineExecutableInfoKHR *pExecutableInfo,
- uint32_t *pStatisticCount,
- VkPipelineExecutableStatisticKHR *pStatistics)
+radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo,
+ uint32_t *pStatisticCount, VkPipelineExecutableStatisticKHR *pStatistics)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
const struct radv_physical_device *pdevice = device->physical_device;
unsigned lds_increment = pdevice->rad_info.gfx_level >= GFX11 && stage == MESA_SHADER_FRAGMENT
- ? 1024 : pdevice->rad_info.lds_encode_granularity;
+ ? 1024
+ : pdevice->rad_info.lds_encode_granularity;
unsigned max_waves = radv_get_max_waves(device, shader, stage);
VkPipelineExecutableStatisticKHR *s = pStatistics;
VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPipelineExecutableInternalRepresentationsKHR(
- VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo,
- uint32_t *pInternalRepresentationCount,
+ VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo, uint32_t *pInternalRepresentationCount,
VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations)
{
RADV_FROM_HANDLE(radv_device, device, _device);
for (uint32_t i = 0; i < stageCount; i++) {
RADV_FROM_HANDLE(vk_shader_module, module, new_stages[i].module);
- const VkShaderModuleCreateInfo *minfo =
- vk_find_struct_const(pStages[i].pNext, SHADER_MODULE_CREATE_INFO);
+ const VkShaderModuleCreateInfo *minfo = vk_find_struct_const(pStages[i].pNext, SHADER_MODULE_CREATE_INFO);
if (module) {
- struct vk_shader_module *new_module =
- ralloc_size(mem_ctx, sizeof(struct vk_shader_module) + module->size);
+ struct vk_shader_module *new_module = ralloc_size(mem_ctx, sizeof(struct vk_shader_module) + module->size);
if (!new_module)
return NULL;
}
void
-radv_hash_shaders(unsigned char *hash, const struct radv_pipeline_stage *stages,
- uint32_t stage_count, const struct radv_pipeline_layout *layout,
- const struct radv_pipeline_key *key, uint32_t flags)
+radv_hash_shaders(unsigned char *hash, const struct radv_pipeline_stage *stages, uint32_t stage_count,
+ const struct radv_pipeline_layout *layout, const struct radv_pipeline_key *key, uint32_t flags)
{
struct mesa_sha1 ctx;
}
void
-radv_hash_rt_stages(struct mesa_sha1 *ctx, const VkPipelineShaderStageCreateInfo *stages,
- unsigned stage_count)
+radv_hash_rt_stages(struct mesa_sha1 *ctx, const VkPipelineShaderStageCreateInfo *stages, unsigned stage_count)
{
for (unsigned i = 0; i < stage_count; ++i) {
unsigned char hash[20];
void
radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
- const struct radv_pipeline_key *key,
- const struct radv_ray_tracing_group *groups, uint32_t flags)
+ const struct radv_pipeline_key *key, const struct radv_ray_tracing_group *groups, uint32_t flags)
{
RADV_FROM_HANDLE(radv_pipeline_layout, layout, pCreateInfo->layout);
struct mesa_sha1 ctx;
radv_hash_rt_stages(&ctx, pCreateInfo->pStages, pCreateInfo->stageCount);
for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) {
- _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].type,
- sizeof(pCreateInfo->pGroups[i].type));
- _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].generalShader,
- sizeof(pCreateInfo->pGroups[i].generalShader));
- _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].anyHitShader,
- sizeof(pCreateInfo->pGroups[i].anyHitShader));
+ _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].type, sizeof(pCreateInfo->pGroups[i].type));
+ _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].generalShader, sizeof(pCreateInfo->pGroups[i].generalShader));
+ _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].anyHitShader, sizeof(pCreateInfo->pGroups[i].anyHitShader));
_mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].closestHitShader,
sizeof(pCreateInfo->pGroups[i].closestHitShader));
_mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].intersectionShader,
}
const uint32_t pipeline_flags =
- pCreateInfo->flags & (VK_PIPELINE_CREATE_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR |
- VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR |
- VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR |
- VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR |
- VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR |
- VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR |
- VK_PIPELINE_CREATE_LIBRARY_BIT_KHR);
+ pCreateInfo->flags &
+ (VK_PIPELINE_CREATE_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR | VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR |
+ VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR |
+ VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR |
+ VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR |
+ VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR | VK_PIPELINE_CREATE_LIBRARY_BIT_KHR);
_mesa_sha1_update(&ctx, &pipeline_flags, 4);
_mesa_sha1_update(&ctx, &flags, 4);
struct blob_reader *blob)
{
struct radv_device *device = container_of(cache->base.device, struct radv_device, vk);
- const struct radv_shader_binary *binary =
- blob_read_bytes(blob, sizeof(struct radv_shader_binary));
+ const struct radv_shader_binary *binary = blob_read_bytes(blob, sizeof(struct radv_shader_binary));
assert(key_size == SHA1_DIGEST_LENGTH);
struct radv_shader *shader = radv_shader_create(device, binary);
/* TODO: Skip disk-cache for meta-shaders because they are stored in a different cache file */
struct vk_pipeline_cache_object *shader_obj;
- shader_obj = vk_pipeline_cache_create_and_insert_object(cache, hash, SHA1_DIGEST_LENGTH, binary,
- binary->total_size, &radv_shader_ops);
+ shader_obj = vk_pipeline_cache_create_and_insert_object(cache, hash, SHA1_DIGEST_LENGTH, binary, binary->total_size,
+ &radv_shader_ops);
return shader_obj ? container_of(shader_obj, struct radv_shader, base) : NULL;
}
unsigned num_stack_sizes, unsigned ps_epilog_binary_size)
{
assert(num_stack_sizes == 0 || ps_epilog_binary_size == 0);
- const size_t size = sizeof(struct radv_pipeline_cache_object) +
- (num_shaders * sizeof(struct radv_shader *)) + ps_epilog_binary_size +
- (num_stack_sizes * sizeof(uint32_t));
+ const size_t size = sizeof(struct radv_pipeline_cache_object) + (num_shaders * sizeof(struct radv_shader *)) +
+ ps_epilog_binary_size + (num_stack_sizes * sizeof(uint32_t));
- struct radv_pipeline_cache_object *object =
- vk_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
+ struct radv_pipeline_cache_object *object = vk_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
if (!object)
return NULL;
- vk_pipeline_cache_object_init(device, &object->base, &radv_pipeline_ops, object->sha1,
- SHA1_DIGEST_LENGTH);
+ vk_pipeline_cache_object_init(device, &object->base, &radv_pipeline_ops, object->sha1, SHA1_DIGEST_LENGTH);
object->num_shaders = num_shaders;
object->num_stack_sizes = num_stack_sizes;
object->ps_epilog_binary_size = ps_epilog_binary_size;
}
static void
-radv_pipeline_cache_object_destroy(struct vk_device *_device,
- struct vk_pipeline_cache_object *object)
+radv_pipeline_cache_object_destroy(struct vk_device *_device, struct vk_pipeline_cache_object *object)
{
struct radv_device *device = container_of(_device, struct radv_device, vk);
- struct radv_pipeline_cache_object *pipeline_obj =
- container_of(object, struct radv_pipeline_cache_object, base);
+ struct radv_pipeline_cache_object *pipeline_obj = container_of(object, struct radv_pipeline_cache_object, base);
for (unsigned i = 0; i < pipeline_obj->num_shaders; i++) {
if (pipeline_obj->shaders[i])
}
static struct vk_pipeline_cache_object *
-radv_pipeline_cache_object_deserialize(struct vk_pipeline_cache *cache, const void *key_data,
- size_t key_size, struct blob_reader *blob)
+radv_pipeline_cache_object_deserialize(struct vk_pipeline_cache *cache, const void *key_data, size_t key_size,
+ struct blob_reader *blob)
{
struct radv_device *device = container_of(cache->base.device, struct radv_device, vk);
assert(key_size == SHA1_DIGEST_LENGTH);
unsigned ps_epilog_binary_size = blob_read_uint32(blob);
struct radv_pipeline_cache_object *object;
- object = radv_pipeline_cache_object_create(&device->vk, num_shaders, key_data, num_stack_sizes,
- ps_epilog_binary_size);
+ object =
+ radv_pipeline_cache_object_create(&device->vk, num_shaders, key_data, num_stack_sizes, ps_epilog_binary_size);
if (!object)
return NULL;
if (ps_epilog_binary_size) {
assert(num_stack_sizes == 0);
struct radv_shader_part_binary *binary = object->data;
- object->ps_epilog =
- radv_shader_part_create(device, binary, device->physical_device->ps_wave_size);
+ object->ps_epilog = radv_shader_part_create(device, binary, device->physical_device->ps_wave_size);
if (!object->ps_epilog) {
vk_pipeline_cache_object_unref(&device->vk, &object->base);
static bool
radv_pipeline_cache_object_serialize(struct vk_pipeline_cache_object *object, struct blob *blob)
{
- struct radv_pipeline_cache_object *pipeline_obj =
- container_of(object, struct radv_pipeline_cache_object, base);
+ struct radv_pipeline_cache_object *pipeline_obj = container_of(object, struct radv_pipeline_cache_object, base);
blob_write_uint32(blob, pipeline_obj->num_shaders);
blob_write_uint32(blob, pipeline_obj->num_stack_sizes);
for (unsigned i = 0; i < pipeline_obj->num_shaders; i++)
blob_write_bytes(blob, pipeline_obj->shaders[i]->sha1, SHA1_DIGEST_LENGTH);
- const size_t data_size =
- pipeline_obj->ps_epilog_binary_size + (pipeline_obj->num_stack_sizes * sizeof(uint32_t));
+ const size_t data_size = pipeline_obj->ps_epilog_binary_size + (pipeline_obj->num_stack_sizes * sizeof(uint32_t));
blob_write_bytes(blob, pipeline_obj->data, data_size);
return true;
};
bool
-radv_pipeline_cache_search(struct radv_device *device, struct vk_pipeline_cache *cache,
- struct radv_pipeline *pipeline, const unsigned char *sha1,
- bool *found_in_application_cache)
+radv_pipeline_cache_search(struct radv_device *device, struct vk_pipeline_cache *cache, struct radv_pipeline *pipeline,
+ const unsigned char *sha1, bool *found_in_application_cache)
{
*found_in_application_cache = false;
if (!object)
return false;
- struct radv_pipeline_cache_object *pipeline_obj =
- container_of(object, struct radv_pipeline_cache_object, base);
+ struct radv_pipeline_cache_object *pipeline_obj = container_of(object, struct radv_pipeline_cache_object, base);
for (unsigned i = 0; i < pipeline_obj->num_shaders; i++) {
gl_shader_stage s = pipeline_obj->shaders[i]->info.stage;
}
void
-radv_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache,
- struct radv_pipeline *pipeline,
- struct radv_shader_part_binary *ps_epilog_binary,
- const unsigned char *sha1)
+radv_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache, struct radv_pipeline *pipeline,
+ struct radv_shader_part_binary *ps_epilog_binary, const unsigned char *sha1)
{
if (radv_is_cache_disabled(device))
return;
unsigned ps_epilog_binary_size = ps_epilog_binary ? ps_epilog_binary->total_size : 0;
struct radv_pipeline_cache_object *pipeline_obj;
- pipeline_obj =
- radv_pipeline_cache_object_create(&device->vk, num_shaders, sha1, 0, ps_epilog_binary_size);
+ pipeline_obj = radv_pipeline_cache_object_create(&device->vk, num_shaders, sha1, 0, ps_epilog_binary_size);
if (!pipeline_obj)
return;
}
/* Add the object to the cache */
- struct vk_pipeline_cache_object *object =
- vk_pipeline_cache_add_object(cache, &pipeline_obj->base);
+ struct vk_pipeline_cache_object *object = vk_pipeline_cache_add_object(cache, &pipeline_obj->base);
vk_pipeline_cache_object_unref(&device->vk, object);
}
cache = device->mem_cache;
bool cache_hit = false;
- struct vk_pipeline_cache_object *object = vk_pipeline_cache_lookup_object(
- cache, pipeline->sha1, SHA1_DIGEST_LENGTH, &radv_pipeline_ops, &cache_hit);
+ struct vk_pipeline_cache_object *object =
+ vk_pipeline_cache_lookup_object(cache, pipeline->sha1, SHA1_DIGEST_LENGTH, &radv_pipeline_ops, &cache_hit);
if (!object)
return false;
- struct radv_pipeline_cache_object *pipeline_obj =
- container_of(object, struct radv_pipeline_cache_object, base);
+ struct radv_pipeline_cache_object *pipeline_obj = container_of(object, struct radv_pipeline_cache_object, base);
bool is_library = pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR;
bool complete = true;
unsigned idx = 0;
if (!is_library)
- pipeline->base.base.shaders[MESA_SHADER_INTERSECTION] =
- radv_shader_ref(pipeline_obj->shaders[idx++]);
+ pipeline->base.base.shaders[MESA_SHADER_INTERSECTION] = radv_shader_ref(pipeline_obj->shaders[idx++]);
for (unsigned i = 0; i < pCreateInfo->stageCount; i++) {
if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i])) {
pipeline->stages[i].shader = &radv_shader_ref(pipeline_obj->shaders[idx++])->base;
} else if (is_library) {
- pipeline->stages[i].shader =
- radv_pipeline_cache_search_nir(device, cache, pipeline->stages[i].sha1);
+ pipeline->stages[i].shader = radv_pipeline_cache_search_nir(device, cache, pipeline->stages[i].sha1);
complete &= pipeline->stages[i].shader != NULL;
}
}
void
radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache,
- struct radv_ray_tracing_pipeline *pipeline,
- unsigned num_stages, const unsigned char *sha1)
+ struct radv_ray_tracing_pipeline *pipeline, unsigned num_stages,
+ const unsigned char *sha1)
{
if (radv_is_cache_disabled(device))
return;
unsigned idx = 0;
if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION])
- pipeline_obj->shaders[idx++] =
- radv_shader_ref(pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
+ pipeline_obj->shaders[idx++] = radv_shader_ref(pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
for (unsigned i = 0; i < num_stages; ++i) {
if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i]))
stack_sizes[i] = pipeline->stages[i].stack_size;
/* Add the object to the cache */
- struct vk_pipeline_cache_object *object =
- vk_pipeline_cache_add_object(cache, &pipeline_obj->base);
+ struct vk_pipeline_cache_object *object = vk_pipeline_cache_add_object(cache, &pipeline_obj->base);
vk_pipeline_cache_object_unref(&device->vk, object);
}
struct vk_pipeline_cache_object *
-radv_pipeline_cache_search_nir(struct radv_device *device, struct vk_pipeline_cache *cache,
- const uint8_t *sha1)
+radv_pipeline_cache_search_nir(struct radv_device *device, struct vk_pipeline_cache *cache, const uint8_t *sha1)
{
if (radv_is_cache_disabled(device))
return NULL;
if (!cache)
cache = device->mem_cache;
- return vk_pipeline_cache_lookup_object(cache, sha1, SHA1_DIGEST_LENGTH,
- &vk_raw_data_cache_object_ops, NULL);
+ return vk_pipeline_cache_lookup_object(cache, sha1, SHA1_DIGEST_LENGTH, &vk_raw_data_cache_object_ops, NULL);
}
struct nir_shader *
-radv_pipeline_cache_handle_to_nir(struct radv_device *device,
- struct vk_pipeline_cache_object *object)
+radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline_cache_object *object)
{
struct blob_reader blob;
- struct vk_raw_data_cache_object *nir_object =
- container_of(object, struct vk_raw_data_cache_object, base);
+ struct vk_raw_data_cache_object *nir_object = container_of(object, struct vk_raw_data_cache_object, base);
blob_reader_init(&blob, nir_object->data, nir_object->data_size);
nir_shader *nir = nir_deserialize(NULL, NULL, &blob);
}
struct vk_pipeline_cache_object *
-radv_pipeline_cache_nir_to_handle(struct radv_device *device, struct vk_pipeline_cache *cache,
- struct nir_shader *nir, const uint8_t *sha1, bool cached)
+radv_pipeline_cache_nir_to_handle(struct radv_device *device, struct vk_pipeline_cache *cache, struct nir_shader *nir,
+ const uint8_t *sha1, bool cached)
{
if (!cache)
cache = device->mem_cache;
struct vk_pipeline_cache_object *object;
if (cached && !radv_is_cache_disabled(device)) {
- object = vk_pipeline_cache_create_and_insert_object(cache, sha1, SHA1_DIGEST_LENGTH, data,
- size, &vk_raw_data_cache_object_ops);
+ object = vk_pipeline_cache_create_and_insert_object(cache, sha1, SHA1_DIGEST_LENGTH, data, size,
+ &vk_raw_data_cache_object_ops);
} else {
struct vk_raw_data_cache_object *nir_object =
vk_raw_data_cache_object_create(&device->vk, sha1, SHA1_DIGEST_LENGTH, data, size);
}
void
-radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice,
- struct radeon_cmdbuf *cs, const struct radv_shader *shader)
+radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
+ const struct radv_shader *shader)
{
unsigned threads_per_threadgroup;
unsigned threadgroups_per_cu = 1;
if (pdevice->rad_info.gfx_level >= GFX10 && waves_per_threadgroup == 1)
threadgroups_per_cu = 2;
- radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
- ac_get_compute_resource_limits(&pdevice->rad_info, waves_per_threadgroup,
- max_waves_per_sh, threadgroups_per_cu));
+ radeon_set_sh_reg(
+ cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
+ ac_get_compute_resource_limits(&pdevice->rad_info, waves_per_threadgroup, max_waves_per_sh, threadgroups_per_cu));
radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0]));
}
static struct radv_pipeline_key
-radv_generate_compute_pipeline_key(const struct radv_device *device,
- struct radv_compute_pipeline *pipeline,
+radv_generate_compute_pipeline_key(const struct radv_device *device, struct radv_compute_pipeline *pipeline,
const VkComputePipelineCreateInfo *pCreateInfo)
{
const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->stage;
- struct radv_pipeline_key key =
- radv_generate_pipeline_key(device, &pipeline->base, pCreateInfo->flags);
+ struct radv_pipeline_key key = radv_generate_pipeline_key(device, &pipeline->base, pCreateInfo->flags);
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_size =
vk_find_struct_const(stage->pNext, PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
if (subgroup_size) {
- assert(subgroup_size->requiredSubgroupSize == 32 ||
- subgroup_size->requiredSubgroupSize == 64);
+ assert(subgroup_size->requiredSubgroupSize == 32 || subgroup_size->requiredSubgroupSize == 64);
key.cs.compute_subgroup_size = subgroup_size->requiredSubgroupSize;
} else if (stage->flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT) {
key.cs.require_full_subgroups = true;
}
static VkResult
-radv_compute_pipeline_compile(struct radv_compute_pipeline *pipeline,
- struct radv_pipeline_layout *pipeline_layout,
+radv_compute_pipeline_compile(struct radv_compute_pipeline *pipeline, struct radv_pipeline_layout *pipeline_layout,
struct radv_device *device, struct vk_pipeline_cache *cache,
const struct radv_pipeline_key *pipeline_key,
- const VkPipelineShaderStageCreateInfo *pStage,
- const VkPipelineCreateFlags flags,
+ const VkPipelineShaderStageCreateInfo *pStage, const VkPipelineCreateFlags flags,
const VkPipelineCreationFeedbackCreateInfo *creation_feedback)
{
struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL};
pipeline->base.pipeline_hash = *(uint64_t *)hash;
bool found_in_application_cache = true;
- if (!keep_executable_info && radv_pipeline_cache_search(device, cache, &pipeline->base, hash,
- &found_in_application_cache)) {
+ if (!keep_executable_info &&
+ radv_pipeline_cache_search(device, cache, &pipeline->base, hash, &found_in_application_cache)) {
if (found_in_application_cache)
- pipeline_feedback.flags |=
- VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
+ pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
result = VK_SUCCESS;
goto done;
}
int64_t stage_start = os_time_get_nano();
/* Compile SPIR-V shader to NIR. */
- cs_stage.nir =
- radv_shader_spirv_to_nir(device, &cs_stage, pipeline_key, pipeline->base.is_internal);
+ cs_stage.nir = radv_shader_spirv_to_nir(device, &cs_stage, pipeline_key, pipeline->base.is_internal);
radv_optimize_nir(cs_stage.nir, pipeline_key->optimisations_disabled);
/* Run the shader info pass. */
radv_nir_shader_info_init(&cs_stage.info);
- radv_nir_shader_info_pass(device, cs_stage.nir, MESA_SHADER_NONE, pipeline_layout, pipeline_key,
- pipeline->base.type, false, &cs_stage.info);
+ radv_nir_shader_info_pass(device, cs_stage.nir, MESA_SHADER_NONE, pipeline_layout, pipeline_key, pipeline->base.type,
+ false, &cs_stage.info);
- radv_declare_shader_args(device, pipeline_key, &cs_stage.info, MESA_SHADER_COMPUTE,
- MESA_SHADER_NONE, RADV_SHADER_TYPE_DEFAULT, &cs_stage.args);
+ radv_declare_shader_args(device, pipeline_key, &cs_stage.info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE,
+ RADV_SHADER_TYPE_DEFAULT, &cs_stage.args);
cs_stage.info.user_sgprs_locs = cs_stage.args.user_sgprs_locs;
cs_stage.info.inline_push_constant_mask = cs_stage.args.ac.inline_push_const_mask;
nir_print_shader(cs_stage.nir, stderr);
/* Compile NIR shader to AMD assembly. */
- pipeline->base.shaders[MESA_SHADER_COMPUTE] = radv_shader_nir_to_asm(
- device, cache, &cs_stage, &cs_stage.nir, 1, pipeline_key, keep_executable_info,
- keep_statistic_info, &binaries[MESA_SHADER_COMPUTE]);
+ pipeline->base.shaders[MESA_SHADER_COMPUTE] =
+ radv_shader_nir_to_asm(device, cache, &cs_stage, &cs_stage.nir, 1, pipeline_key, keep_executable_info,
+ keep_statistic_info, &binaries[MESA_SHADER_COMPUTE]);
cs_stage.feedback.duration += os_time_get_nano() - stage_start;
free(binaries[MESA_SHADER_COMPUTE]);
if (radv_can_dump_shader_stats(device, cs_stage.nir)) {
- radv_dump_shader_stats(device, &pipeline->base, pipeline->base.shaders[MESA_SHADER_COMPUTE],
- MESA_SHADER_COMPUTE, stderr);
+ radv_dump_shader_stats(device, &pipeline->base, pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE,
+ stderr);
}
ralloc_free(cs_stage.nir);
}
VkResult
-radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
- const VkComputePipelineCreateInfo *pCreateInfo,
+radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkComputePipelineCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_compute_pipeline *pipeline;
VkResult result;
- pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pipeline == NULL) {
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
struct radv_pipeline_key key = radv_generate_compute_pipeline_key(device, pipeline, pCreateInfo);
- result =
- radv_compute_pipeline_compile(pipeline, pipeline_layout, device, cache, &key,
- &pCreateInfo->stage, pCreateInfo->flags, creation_feedback);
+ result = radv_compute_pipeline_compile(pipeline, pipeline_layout, device, cache, &key, &pCreateInfo->stage,
+ pCreateInfo->flags, creation_feedback);
if (result != VK_SUCCESS) {
radv_pipeline_destroy(device, &pipeline->base, pAllocator);
return result;
radv_compute_pipeline_init(device, pipeline, pipeline_layout);
*pPipeline = radv_pipeline_to_handle(&pipeline->base);
- radv_rmv_log_compute_pipeline_create(device, pCreateInfo->flags, &pipeline->base,
- pipeline->base.is_internal);
+ radv_rmv_log_compute_pipeline_create(device, pCreateInfo->flags, &pipeline->base, pipeline->base.is_internal);
return VK_SUCCESS;
}
static VkResult
radv_create_compute_pipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
- const VkComputePipelineCreateInfo *pCreateInfos,
- const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
+ const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipelines)
{
VkResult result = VK_SUCCESS;
unsigned i = 0;
for (; i < count; i++) {
VkResult r;
- r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator,
- &pPipelines[i]);
+ r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i]);
if (r != VK_SUCCESS) {
result = r;
pPipelines[i] = VK_NULL_HANDLE;
VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
- const VkComputePipelineCreateInfo *pCreateInfos,
- const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
+ const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipelines)
{
- return radv_create_compute_pipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
- pPipelines);
+ return radv_create_compute_pipelines(_device, pipelineCache, count, pCreateInfos, pAllocator, pPipelines);
}
}
static bool
-radv_is_vrs_enabled(const struct radv_graphics_pipeline *pipeline,
- const struct vk_graphics_pipeline_state *state)
+radv_is_vrs_enabled(const struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state)
{
return radv_is_static_vrs_enabled(pipeline, state) ||
(pipeline->dynamic_states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE);
static bool
radv_pipeline_has_ds_attachments(const struct vk_render_pass_state *rp)
{
- return rp->depth_attachment_format != VK_FORMAT_UNDEFINED ||
- rp->stencil_attachment_format != VK_FORMAT_UNDEFINED;
+ return rp->depth_attachment_format != VK_FORMAT_UNDEFINED || rp->stencil_attachment_format != VK_FORMAT_UNDEFINED;
}
static bool
* func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
*/
void
-si_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor *dst_factor,
- VkBlendFactor expected_dst, VkBlendFactor replacement_src)
+si_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor *dst_factor, VkBlendFactor expected_dst,
+ VkBlendFactor replacement_src)
{
if (*src_factor == expected_dst && *dst_factor == VK_BLEND_FACTOR_ZERO) {
*src_factor = VK_BLEND_FACTOR_ZERO;
}
static unsigned
-radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format,
- bool blend_enable, bool blend_need_alpha)
+radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, bool blend_enable,
+ bool blend_need_alpha)
{
const struct util_format_description *desc = vk_format_description(vk_format);
bool use_rbplus = device->physical_device->rad_info.rbplus_allowed;
const struct util_format_description *desc = vk_format_description(format);
int channel = vk_format_get_first_non_void_channel(format);
- return channel >= 0 && desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT &&
- desc->channel[channel].size == 32;
+ return channel >= 0 && desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT && desc->channel[channel].size == 32;
}
static unsigned
-radv_compact_spi_shader_col_format(const struct radv_shader *ps,
- const struct radv_blend_state *blend)
+radv_compact_spi_shader_col_format(const struct radv_shader *ps, const struct radv_blend_state *blend)
{
unsigned spi_shader_col_format = blend->spi_shader_col_format;
unsigned value = 0, num_mrts = 0;
* radv_format_meta_fs_key(radv_fs_key_format_exemplars[i]) == i.
*/
const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS] = {
- VK_FORMAT_R32_SFLOAT,
- VK_FORMAT_R32G32_SFLOAT,
- VK_FORMAT_R8G8B8A8_UNORM,
- VK_FORMAT_R16G16B16A16_UNORM,
- VK_FORMAT_R16G16B16A16_SNORM,
- VK_FORMAT_R16G16B16A16_UINT,
- VK_FORMAT_R16G16B16A16_SINT,
- VK_FORMAT_R32G32B32A32_SFLOAT,
- VK_FORMAT_R8G8B8A8_UINT,
- VK_FORMAT_R8G8B8A8_SINT,
- VK_FORMAT_A2R10G10B10_UINT_PACK32,
- VK_FORMAT_A2R10G10B10_SINT_PACK32,
+ VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM,
+ VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R16G16B16A16_UINT,
+ VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R8G8B8A8_UINT,
+ VK_FORMAT_R8G8B8A8_SINT, VK_FORMAT_A2R10G10B10_UINT_PACK32, VK_FORMAT_A2R10G10B10_SINT_PACK32,
};
unsigned
radv_pipeline_needs_dynamic_ps_epilog(const struct radv_graphics_pipeline *pipeline)
{
/* These dynamic states need to compile PS epilogs on-demand. */
- return !!(pipeline->dynamic_states &
- (RADV_DYNAMIC_COLOR_BLEND_ENABLE | RADV_DYNAMIC_COLOR_WRITE_MASK |
- RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE | RADV_DYNAMIC_COLOR_BLEND_EQUATION));
+ return !!(pipeline->dynamic_states & (RADV_DYNAMIC_COLOR_BLEND_ENABLE | RADV_DYNAMIC_COLOR_WRITE_MASK |
+ RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE | RADV_DYNAMIC_COLOR_BLEND_EQUATION));
}
static struct radv_blend_state
-radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline,
- const struct vk_graphics_pipeline_state *state)
+radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state)
{
const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
struct radv_blend_state blend = {0};
return !!subpass->fragment_shading_rate_attachment;
}
- return (pCreateInfo->flags &
- VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) != 0;
+ return (pCreateInfo->flags & VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) != 0;
}
static void
-radv_pipeline_init_multisample_state(const struct radv_device *device,
- struct radv_graphics_pipeline *pipeline,
+radv_pipeline_init_multisample_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
- const struct vk_graphics_pipeline_state *state,
- unsigned rast_prim)
+ const struct vk_graphics_pipeline_state *state, unsigned rast_prim)
{
struct radv_multisample_state *ms = &pipeline->ms;
}
}
-#define RADV_DYNAMIC_CB_STATES \
- (RADV_DYNAMIC_LOGIC_OP_ENABLE | RADV_DYNAMIC_LOGIC_OP | RADV_DYNAMIC_COLOR_WRITE_ENABLE | \
- RADV_DYNAMIC_COLOR_WRITE_MASK | RADV_DYNAMIC_COLOR_BLEND_ENABLE | \
- RADV_DYNAMIC_COLOR_BLEND_EQUATION | RADV_DYNAMIC_BLEND_CONSTANTS)
+#define RADV_DYNAMIC_CB_STATES \
+ (RADV_DYNAMIC_LOGIC_OP_ENABLE | RADV_DYNAMIC_LOGIC_OP | RADV_DYNAMIC_COLOR_WRITE_ENABLE | \
+ RADV_DYNAMIC_COLOR_WRITE_MASK | RADV_DYNAMIC_COLOR_BLEND_ENABLE | RADV_DYNAMIC_COLOR_BLEND_EQUATION | \
+ RADV_DYNAMIC_BLEND_CONSTANTS)
static bool
-radv_pipeline_is_blend_enabled(const struct radv_graphics_pipeline *pipeline,
- const struct vk_color_blend_state *cb)
+radv_pipeline_is_blend_enabled(const struct radv_graphics_pipeline *pipeline, const struct vk_color_blend_state *cb)
{
if (cb) {
for (uint32_t i = 0; i < cb->attachment_count; i++) {
}
static uint64_t
-radv_pipeline_needed_dynamic_state(const struct radv_device *device,
- const struct radv_graphics_pipeline *pipeline,
+radv_pipeline_needed_dynamic_state(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state)
{
bool has_color_att = radv_pipeline_has_color_attachments(state->rp);
- bool raster_enabled = !state->rs->rasterizer_discard_enable ||
- (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
+ bool raster_enabled =
+ !state->rs->rasterizer_discard_enable || (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
uint64_t states = RADV_DYNAMIC_ALL;
if (device->physical_device->rad_info.gfx_level < GFX10_3)
return states;
}
- if (!state->rs->depth_bias.enable &&
- !(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BIAS_ENABLE))
+ if (!state->rs->depth_bias.enable && !(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BIAS_ENABLE))
states &= ~RADV_DYNAMIC_DEPTH_BIAS;
if (!(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) &&
if (!(pipeline->dynamic_states & RADV_DYNAMIC_STENCIL_TEST_ENABLE) &&
(!state->ds || !state->ds->stencil.test_enable))
- states &= ~(RADV_DYNAMIC_STENCIL_COMPARE_MASK | RADV_DYNAMIC_STENCIL_WRITE_MASK |
- RADV_DYNAMIC_STENCIL_REFERENCE | RADV_DYNAMIC_STENCIL_OP);
+ states &= ~(RADV_DYNAMIC_STENCIL_COMPARE_MASK | RADV_DYNAMIC_STENCIL_WRITE_MASK | RADV_DYNAMIC_STENCIL_REFERENCE |
+ RADV_DYNAMIC_STENCIL_OP);
- if (!(pipeline->dynamic_states & RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE) &&
- !state->dr->rectangle_count)
+ if (!(pipeline->dynamic_states & RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE) && !state->dr->rectangle_count)
states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
if (!(pipeline->dynamic_states & RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE) &&
(!state->ms || !state->ms->sample_locations_enable))
states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS;
- if (!(pipeline->dynamic_states & RADV_DYNAMIC_LINE_STIPPLE_ENABLE) &&
- !state->rs->line.stipple.enable)
+ if (!(pipeline->dynamic_states & RADV_DYNAMIC_LINE_STIPPLE_ENABLE) && !state->rs->line.stipple.enable)
states &= ~RADV_DYNAMIC_LINE_STIPPLE;
if (!has_color_att || !radv_pipeline_is_blend_enabled(pipeline, state->cb))
}
static struct radv_ia_multi_vgt_param_helpers
-radv_compute_ia_multi_vgt_param_helpers(const struct radv_device *device,
- struct radv_graphics_pipeline *pipeline)
+radv_compute_ia_multi_vgt_param_helpers(const struct radv_device *device, struct radv_graphics_pipeline *pipeline)
{
const struct radv_physical_device *pdevice = device->physical_device;
struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0};
* Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242
*/
if (pdevice->rad_info.family == CHIP_TONGA || pdevice->rad_info.family == CHIP_FIJI ||
- pdevice->rad_info.family == CHIP_POLARIS10 ||
- pdevice->rad_info.family == CHIP_POLARIS11 ||
+ pdevice->rad_info.family == CHIP_POLARIS10 || pdevice->rad_info.family == CHIP_POLARIS11 ||
pdevice->rad_info.family == CHIP_POLARIS12 || pdevice->rad_info.family == CHIP_VEGAM) {
ia_multi_vgt_param.partial_vs_wave = true;
}
return 0;
}
-#define ALL_GRAPHICS_LIB_FLAGS \
- (VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT | \
- VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | \
- VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | \
+#define ALL_GRAPHICS_LIB_FLAGS \
+ (VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT | \
+ VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | \
+ VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | \
VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)
static VkGraphicsPipelineLibraryFlagBitsEXT
}
static VkResult
-radv_pipeline_import_graphics_info(struct radv_device *device,
- struct radv_graphics_pipeline *pipeline,
- struct vk_graphics_pipeline_state *state,
- struct radv_pipeline_layout *layout,
+radv_pipeline_import_graphics_info(struct radv_device *device, struct radv_graphics_pipeline *pipeline,
+ struct vk_graphics_pipeline_state *state, struct radv_pipeline_layout *layout,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
{
if (pCreateInfo->pDynamicState) {
uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
for (uint32_t s = 0; s < count; s++) {
- pipeline->dynamic_states |=
- radv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]);
+ pipeline->dynamic_states |= radv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]);
}
}
pipeline->active_stages |= sinfo->stage;
}
- result =
- vk_graphics_pipeline_state_fill(&device->vk, state, pCreateInfo, NULL, NULL, NULL,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, &pipeline->state_data);
+ result = vk_graphics_pipeline_state_fill(&device->vk, state, pCreateInfo, NULL, NULL, NULL,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, &pipeline->state_data);
if (result != VK_SUCCESS)
return result;
if (pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_EXT) {
pipeline->last_vgt_api_stage = MESA_SHADER_MESH;
} else {
- pipeline->last_vgt_api_stage =
- util_last_bit(pipeline->active_stages & BITFIELD_MASK(MESA_SHADER_FRAGMENT)) - 1;
+ pipeline->last_vgt_api_stage = util_last_bit(pipeline->active_stages & BITFIELD_MASK(MESA_SHADER_FRAGMENT)) - 1;
}
if (lib_flags == ALL_GRAPHICS_LIB_FLAGS) {
}
static void
-radv_graphics_pipeline_import_lib(const struct radv_device *device,
- struct radv_graphics_pipeline *pipeline,
- struct vk_graphics_pipeline_state *state,
- struct radv_pipeline_layout *layout,
+radv_graphics_pipeline_import_lib(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
+ struct vk_graphics_pipeline_state *state, struct radv_pipeline_layout *layout,
struct radv_graphics_lib_pipeline *lib, bool link_optimize)
{
bool import_binaries = false;
}
static void
-radv_pipeline_init_input_assembly_state(const struct radv_device *device,
- struct radv_graphics_pipeline *pipeline)
+radv_pipeline_init_input_assembly_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline)
{
pipeline->ia_multi_vgt_param = radv_compute_ia_multi_vgt_param_helpers(device, pipeline);
}
}
}
- return (pCreateInfo->flags &
- VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) != 0;
+ return (pCreateInfo->flags & VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) != 0;
}
static void
-radv_pipeline_init_dynamic_state(const struct radv_device *device,
- struct radv_graphics_pipeline *pipeline,
+radv_pipeline_init_dynamic_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state,
const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
}
if (states & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
- unsigned count = state->ms->sample_locations->per_pixel *
- state->ms->sample_locations->grid_size.width *
+ unsigned count = state->ms->sample_locations->per_pixel * state->ms->sample_locations->grid_size.width *
state->ms->sample_locations->grid_size.height;
dynamic->sample_location.per_pixel = state->ms->sample_locations->per_pixel;
dynamic->sample_location.grid_size = state->ms->sample_locations->grid_size;
dynamic->sample_location.count = count;
- typed_memcpy(&dynamic->sample_location.locations[0], state->ms->sample_locations->locations,
- count);
+ typed_memcpy(&dynamic->sample_location.locations[0], state->ms->sample_locations->locations, count);
}
/* Depth stencil. */
bool uses_ds_feedback_loop = radv_pipeline_uses_ds_feedback_loop(pCreateInfo, state);
dynamic->feedback_loop_aspects =
- uses_ds_feedback_loop ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)
- : VK_IMAGE_ASPECT_NONE;
+ uses_ds_feedback_loop ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_NONE;
}
pipeline->dynamic_state.mask = states;
}
static uint32_t
-radv_compute_db_shader_control(const struct radv_device *device,
- const struct radv_graphics_pipeline *pipeline,
+radv_compute_db_shader_control(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state)
{
const struct radv_physical_device *pdevice = device->physical_device;
return S_02880C_Z_EXPORT_ENABLE(ps && ps->info.ps.writes_z) |
S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps && ps->info.ps.writes_stencil) |
- S_02880C_KILL_ENABLE(ps && ps->info.ps.can_discard) |
- S_02880C_MASK_EXPORT_ENABLE(mask_export_enable) |
+ S_02880C_KILL_ENABLE(ps && ps->info.ps.can_discard) | S_02880C_MASK_EXPORT_ENABLE(mask_export_enable) |
S_02880C_CONSERVATIVE_Z_EXPORT(conservative_z_export) |
S_02880C_DEPTH_BEFORE_SHADER(ps && ps->info.ps.early_fragment_test) |
S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(ps && ps->info.ps.post_depth_coverage) |
S_02880C_EXEC_ON_HIER_FAIL(ps && ps->info.ps.writes_memory) |
- S_02880C_EXEC_ON_NOOP(ps && ps->info.ps.writes_memory) |
- S_02880C_DUAL_QUAD_DISABLE(disable_rbplus) |
+ S_02880C_EXEC_ON_NOOP(ps && ps->info.ps.writes_memory) | S_02880C_DUAL_QUAD_DISABLE(disable_rbplus) |
S_02880C_OVERRIDE_INTRINSIC_RATE_ENABLE(export_conflict_wa) |
S_02880C_OVERRIDE_INTRINSIC_RATE(export_conflict_wa ? 2 : 0);
}
static void
-gfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
- uint32_t oversub_pc_lines)
+gfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, uint32_t oversub_pc_lines)
{
- radeon_set_uconfig_reg(
- cs, R_030980_GE_PC_ALLOC,
- S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1));
+ radeon_set_uconfig_reg(cs, R_030980_GE_PC_ALLOC,
+ S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1));
}
struct radv_shader *
}
static bool
-radv_should_export_multiview(const struct radv_pipeline_stage *producer,
- const struct radv_pipeline_stage *consumer,
+radv_should_export_multiview(const struct radv_pipeline_stage *producer, const struct radv_pipeline_stage *consumer,
const struct radv_pipeline_key *pipeline_key)
{
/* Export the layer in the last VGT stage if multiview is used. When the next stage is unknown
}
static void
-radv_remove_point_size(const struct radv_pipeline_key *pipeline_key, nir_shader *producer,
- nir_shader *consumer)
+radv_remove_point_size(const struct radv_pipeline_key *pipeline_key, nir_shader *producer, nir_shader *consumer)
{
- if ((consumer->info.inputs_read & VARYING_BIT_PSIZ) ||
- !(producer->info.outputs_written & VARYING_BIT_PSIZ))
+ if ((consumer->info.inputs_read & VARYING_BIT_PSIZ) || !(producer->info.outputs_written & VARYING_BIT_PSIZ))
return;
/* Do not remove PSIZ if the shader uses XFB because it might be stored. */
/* Do not remove PSIZ if the rasterization primitive uses points. */
if (consumer->info.stage == MESA_SHADER_FRAGMENT &&
((producer->info.stage == MESA_SHADER_TESS_EVAL && producer->info.tess.point_mode) ||
- (producer->info.stage == MESA_SHADER_GEOMETRY &&
- producer->info.gs.output_primitive == MESA_PRIM_POINTS) ||
- (producer->info.stage == MESA_SHADER_MESH &&
- producer->info.mesh.primitive_type == MESA_PRIM_POINTS)))
+ (producer->info.stage == MESA_SHADER_GEOMETRY && producer->info.gs.output_primitive == MESA_PRIM_POINTS) ||
+ (producer->info.stage == MESA_SHADER_MESH && producer->info.mesh.primitive_type == MESA_PRIM_POINTS)))
return;
- nir_variable *var =
- nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
+ nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
assert(var);
/* Change PSIZ to a global variable which allows it to be DCE'd. */
if (pipeline_key->dynamic_color_write_mask)
return;
- nir_foreach_shader_out_variable(var, nir)
- {
+ nir_foreach_shader_out_variable (var, nir) {
int idx = var->data.location;
idx -= FRAG_RESULT_DATA0;
tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
- assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
- tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
+ assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED || tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
tcs_info->tess.spacing == tes_info->tess.spacing);
tes_info->tess.spacing |= tcs_info->tess.spacing;
}
static void
-radv_pipeline_link_shaders(const struct radv_device *device, nir_shader *producer,
- nir_shader *consumer, const struct radv_pipeline_key *pipeline_key)
+radv_pipeline_link_shaders(const struct radv_device *device, nir_shader *producer, nir_shader *consumer,
+ const struct radv_pipeline_key *pipeline_key)
{
const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
bool progress;
if (pipeline_key->optimisations_disabled)
return;
- if (consumer->info.stage == MESA_SHADER_FRAGMENT &&
- producer->info.has_transform_feedback_varyings) {
+ if (consumer->info.stage == MESA_SHADER_FRAGMENT && producer->info.has_transform_feedback_varyings) {
nir_link_xfb_varyings(producer, consumer);
}
NIR_PASS(_, producer, nir_opt_combine_stores, nir_var_shader_out);
}
- if (consumer->info.stage == MESA_SHADER_GEOMETRY ||
- consumer->info.stage == MESA_SHADER_TESS_CTRL ||
+ if (consumer->info.stage == MESA_SHADER_GEOMETRY || consumer->info.stage == MESA_SHADER_TESS_CTRL ||
consumer->info.stage == MESA_SHADER_TESS_EVAL) {
NIR_PASS(_, consumer, nir_lower_io_to_vector, nir_var_shader_in);
}
static void
radv_pipeline_link_vs(const struct radv_device *device, struct radv_pipeline_stage *vs_stage,
- struct radv_pipeline_stage *next_stage,
- const struct radv_pipeline_key *pipeline_key)
+ struct radv_pipeline_stage *next_stage, const struct radv_pipeline_key *pipeline_key)
{
assert(vs_stage->nir->info.stage == MESA_SHADER_VERTEX);
}
if (next_stage && next_stage->nir->info.stage == MESA_SHADER_TESS_CTRL) {
- nir_linked_io_var_info vs2tcs =
- nir_assign_linked_io_var_locations(vs_stage->nir, next_stage->nir);
+ nir_linked_io_var_info vs2tcs = nir_assign_linked_io_var_locations(vs_stage->nir, next_stage->nir);
vs_stage->info.vs.num_linked_outputs = vs2tcs.num_linked_io_vars;
next_stage->info.tcs.num_linked_inputs = vs2tcs.num_linked_io_vars;
} else if (next_stage && next_stage->nir->info.stage == MESA_SHADER_GEOMETRY) {
- nir_linked_io_var_info vs2gs =
- nir_assign_linked_io_var_locations(vs_stage->nir, next_stage->nir);
+ nir_linked_io_var_info vs2gs = nir_assign_linked_io_var_locations(vs_stage->nir, next_stage->nir);
vs_stage->info.vs.num_linked_outputs = vs2gs.num_linked_io_vars;
next_stage->info.gs.num_linked_inputs = vs2gs.num_linked_io_vars;
} else {
- nir_foreach_shader_out_variable(var, vs_stage->nir)
- {
+ nir_foreach_shader_out_variable (var, vs_stage->nir) {
var->data.driver_location = var->data.location;
}
}
static void
radv_pipeline_link_tcs(const struct radv_device *device, struct radv_pipeline_stage *tcs_stage,
- struct radv_pipeline_stage *tes_stage,
- const struct radv_pipeline_key *pipeline_key)
+ struct radv_pipeline_stage *tes_stage, const struct radv_pipeline_key *pipeline_key)
{
assert(tcs_stage->nir->info.stage == MESA_SHADER_TESS_CTRL);
assert(tes_stage->nir->info.stage == MESA_SHADER_TESS_EVAL);
/* Copy TCS info into the TES info */
merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
- nir_linked_io_var_info tcs2tes =
- nir_assign_linked_io_var_locations(tcs_stage->nir, tes_stage->nir);
+ nir_linked_io_var_info tcs2tes = nir_assign_linked_io_var_locations(tcs_stage->nir, tes_stage->nir);
tcs_stage->info.tcs.num_linked_outputs = tcs2tes.num_linked_io_vars;
tcs_stage->info.tcs.num_linked_patch_outputs = tcs2tes.num_linked_patch_io_vars;
static void
radv_pipeline_link_tes(const struct radv_device *device, struct radv_pipeline_stage *tes_stage,
- struct radv_pipeline_stage *next_stage,
- const struct radv_pipeline_key *pipeline_key)
+ struct radv_pipeline_stage *next_stage, const struct radv_pipeline_key *pipeline_key)
{
assert(tes_stage->nir->info.stage == MESA_SHADER_TESS_EVAL);
}
if (next_stage && next_stage->nir->info.stage == MESA_SHADER_GEOMETRY) {
- nir_linked_io_var_info tes2gs =
- nir_assign_linked_io_var_locations(tes_stage->nir, next_stage->nir);
+ nir_linked_io_var_info tes2gs = nir_assign_linked_io_var_locations(tes_stage->nir, next_stage->nir);
tes_stage->info.tes.num_linked_outputs = tes2gs.num_linked_io_vars;
next_stage->info.gs.num_linked_inputs = tes2gs.num_linked_io_vars;
} else {
- nir_foreach_shader_out_variable(var, tes_stage->nir)
- {
+ nir_foreach_shader_out_variable (var, tes_stage->nir) {
var->data.driver_location = var->data.location;
}
}
static void
radv_pipeline_link_gs(const struct radv_device *device, struct radv_pipeline_stage *gs_stage,
- struct radv_pipeline_stage *fs_stage,
- const struct radv_pipeline_key *pipeline_key)
+ struct radv_pipeline_stage *fs_stage, const struct radv_pipeline_key *pipeline_key)
{
assert(gs_stage->nir->info.stage == MESA_SHADER_GEOMETRY);
radv_pipeline_link_shaders(device, gs_stage->nir, fs_stage->nir, pipeline_key);
}
- nir_foreach_shader_out_variable(var, gs_stage->nir)
- {
+ nir_foreach_shader_out_variable (var, gs_stage->nir) {
var->data.driver_location = var->data.location;
}
}
static void
radv_pipeline_link_task(const struct radv_device *device, struct radv_pipeline_stage *task_stage,
- struct radv_pipeline_stage *mesh_stage,
- const struct radv_pipeline_key *pipeline_key)
+ struct radv_pipeline_stage *mesh_stage, const struct radv_pipeline_key *pipeline_key)
{
assert(task_stage->nir->info.stage == MESA_SHADER_TASK);
assert(mesh_stage->nir->info.stage == MESA_SHADER_MESH);
static void
radv_pipeline_link_mesh(const struct radv_device *device, struct radv_pipeline_stage *mesh_stage,
- struct radv_pipeline_stage *fs_stage,
- const struct radv_pipeline_key *pipeline_key)
+ struct radv_pipeline_stage *fs_stage, const struct radv_pipeline_key *pipeline_key)
{
assert(mesh_stage->nir->info.stage == MESA_SHADER_MESH);
nir_foreach_shader_in_variable (var, fs_stage->nir) {
/* These variables are per-primitive when used with a mesh shader. */
- if (var->data.location == VARYING_SLOT_PRIMITIVE_ID ||
- var->data.location == VARYING_SLOT_VIEWPORT ||
+ if (var->data.location == VARYING_SLOT_PRIMITIVE_ID || var->data.location == VARYING_SLOT_VIEWPORT ||
var->data.location == VARYING_SLOT_LAYER) {
var->data.per_primitive = true;
}
/* ac_nir_lower_ngg ignores driver locations for mesh shaders, but set them to all zero just to
* be on the safe side.
*/
- nir_foreach_shader_out_variable(var, mesh_stage->nir)
- {
+ nir_foreach_shader_out_variable (var, mesh_stage->nir) {
var->data.driver_location = 0;
}
}
static void
-radv_pipeline_link_fs(struct radv_pipeline_stage *fs_stage,
- const struct radv_pipeline_key *pipeline_key)
+radv_pipeline_link_fs(struct radv_pipeline_stage *fs_stage, const struct radv_pipeline_key *pipeline_key)
{
assert(fs_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
radv_remove_color_exports(pipeline_key, fs_stage->nir);
- nir_foreach_shader_out_variable(var, fs_stage->nir)
- {
+ nir_foreach_shader_out_variable (var, fs_stage->nir) {
var->data.driver_location = var->data.location + var->data.index;
}
}
static bool
-radv_pipeline_needs_noop_fs(struct radv_graphics_pipeline *pipeline,
- const struct radv_pipeline_key *pipeline_key)
+radv_pipeline_needs_noop_fs(struct radv_graphics_pipeline *pipeline, const struct radv_pipeline_key *pipeline_key)
{
if (pipeline->base.type == RADV_PIPELINE_GRAPHICS &&
!(radv_pipeline_to_graphics(&pipeline->base)->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT))
if (pipeline->base.type == RADV_PIPELINE_GRAPHICS_LIB &&
(pipeline_key->lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
- !(radv_pipeline_to_graphics_lib(&pipeline->base)->base.active_stages &
- VK_SHADER_STAGE_FRAGMENT_BIT))
+ !(radv_pipeline_to_graphics_lib(&pipeline->base)->base.active_stages & VK_SHADER_STAGE_FRAGMENT_BIT))
return true;
return false;
{
bool fixup_derefs = false;
- nir_foreach_shader_out_variable(var, nir) {
+ nir_foreach_shader_out_variable (var, nir) {
if (var->data.always_active_io)
continue;
}
static void
-radv_graphics_pipeline_link(const struct radv_device *device,
- struct radv_graphics_pipeline *pipeline,
- const struct radv_pipeline_key *pipeline_key,
- struct radv_pipeline_stage *stages)
+radv_graphics_pipeline_link(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
+ const struct radv_pipeline_key *pipeline_key, struct radv_pipeline_stage *stages)
{
const bool noop_fs = radv_pipeline_needs_noop_fs(pipeline, pipeline_key);
}
struct radv_ps_epilog_key
-radv_generate_ps_epilog_key(const struct radv_device *device,
- const struct radv_ps_epilog_state *state, bool disable_mrt_compaction)
+radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ps_epilog_state *state,
+ bool disable_mrt_compaction)
{
unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0;
struct radv_ps_epilog_key key;
} else {
bool blend_enable = state->color_blend_enable & (0xfu << (i * 4));
- cf = radv_choose_spi_color_format(device, fmt, blend_enable,
- state->need_src_alpha & (1 << i));
+ cf = radv_choose_spi_color_format(device, fmt, blend_enable, state->need_src_alpha & (1 << i));
if (format_is_int8(fmt))
is_int8 |= 1 << i;
}
static struct radv_ps_epilog_key
-radv_pipeline_generate_ps_epilog_key(const struct radv_device *device,
- const struct vk_graphics_pipeline_state *state,
+radv_pipeline_generate_ps_epilog_key(const struct radv_device *device, const struct vk_graphics_pipeline_state *state,
bool disable_mrt_compaction)
{
struct radv_ps_epilog_state ps_epilog = {0};
}
if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
- srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
- dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
- srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA ||
- dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA)
+ srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
+ srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA)
ps_epilog.need_src_alpha |= 1 << i;
}
}
}
static struct radv_pipeline_key
-radv_generate_graphics_pipeline_key(const struct radv_device *device,
- const struct radv_graphics_pipeline *pipeline,
+radv_generate_graphics_pipeline_key(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const struct vk_graphics_pipeline_state *state,
VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
{
const struct radv_physical_device *pdevice = device->physical_device;
- struct radv_pipeline_key key =
- radv_generate_pipeline_key(device, &pipeline->base, pCreateInfo->flags);
+ struct radv_pipeline_key key = radv_generate_pipeline_key(device, &pipeline->base, pCreateInfo->flags);
key.lib_flags = lib_flags;
key.has_multiview_view_index = state->rp ? !!state->rp->view_mask : 0;
const struct ac_vtx_format_info *vtx_info =
ac_get_vtx_format_info(pdevice->rad_info.gfx_level, pdevice->rad_info.family, format);
- unsigned attrib_align =
- vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size;
+ unsigned attrib_align = vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size;
/* If offset is misaligned, then the buffer offset must be too. Just skip updating
* vertex_binding_align in this case.
*/
if (offset % attrib_align == 0) {
- key.vs.vertex_binding_align[binding] =
- MAX2(key.vs.vertex_binding_align[binding], attrib_align);
+ key.vs.vertex_binding_align[binding] = MAX2(key.vs.vertex_binding_align[binding], attrib_align);
}
}
}
if (state->ms) {
key.ps.sample_shading_enable = state->ms->sample_shading_enable;
- if (!(pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) &&
- state->ms->rasterization_samples > 1) {
+ if (!(pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) && state->ms->rasterization_samples > 1) {
key.ps.num_samples = state->ms->rasterization_samples;
}
}
}
if (device->physical_device->rad_info.gfx_level >= GFX10 && state->rs) {
- key.vs.provoking_vtx_last =
- state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
+ key.vs.provoking_vtx_last = state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
}
if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE)
* - it's compiled without the fragment output interface with GPL
* - it's compiled on-demand because some dynamic states are enabled
*/
- key.ps.has_epilog =
- (pipeline->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT) &&
- (((lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
- !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) ||
- key.ps.dynamic_ps_epilog);
+ key.ps.has_epilog = (pipeline->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT) &&
+ (((lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
+ !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) ||
+ key.ps.dynamic_ps_epilog);
/* Disable MRT compaction when it's not possible to know both the written color outputs and the
* color blend attachments.
*/
bool disable_mrt_compaction =
- key.ps.has_epilog ||
- ((lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) &&
- !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT));
+ key.ps.has_epilog || ((lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) &&
+ !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT));
- key.ps.epilog =
- radv_pipeline_generate_ps_epilog_key(device, state, disable_mrt_compaction);
+ key.ps.epilog = radv_pipeline_generate_ps_epilog_key(device, state, disable_mrt_compaction);
- key.dynamic_patch_control_points =
- !!(pipeline->dynamic_states & RADV_DYNAMIC_PATCH_CONTROL_POINTS);
+ key.dynamic_patch_control_points = !!(pipeline->dynamic_states & RADV_DYNAMIC_PATCH_CONTROL_POINTS);
- key.dynamic_rasterization_samples =
- !!(pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) ||
- (!!(pipeline->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && !state->ms);
+ key.dynamic_rasterization_samples = !!(pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) ||
+ (!!(pipeline->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && !state->ms);
key.dynamic_color_write_mask = !!(pipeline->dynamic_states & RADV_DYNAMIC_COLOR_WRITE_MASK);
static void
radv_fill_shader_info_ngg(struct radv_device *device, struct radv_graphics_pipeline *pipeline,
- const struct radv_pipeline_key *pipeline_key,
- struct radv_pipeline_stage *stages)
+ const struct radv_pipeline_key *pipeline_key, struct radv_pipeline_stage *stages)
{
if (pipeline_key->use_ngg) {
if (stages[MESA_SHADER_TESS_CTRL].nir) {
stages[MESA_SHADER_MESH].info.is_ngg = true;
}
- if (device->physical_device->rad_info.gfx_level < GFX11 &&
- stages[MESA_SHADER_TESS_CTRL].nir && stages[MESA_SHADER_GEOMETRY].nir &&
+ if (device->physical_device->rad_info.gfx_level < GFX11 && stages[MESA_SHADER_TESS_CTRL].nir &&
+ stages[MESA_SHADER_GEOMETRY].nir &&
stages[MESA_SHADER_GEOMETRY].nir->info.gs.invocations *
stages[MESA_SHADER_GEOMETRY].nir->info.gs.vertices_out >
256) {
stages[MESA_SHADER_TESS_EVAL].info.is_ngg = false;
}
- bool uses_xfb = pipeline->last_vgt_api_stage != MESA_SHADER_NONE &&
- stages[pipeline->last_vgt_api_stage].nir &&
+ bool uses_xfb = pipeline->last_vgt_api_stage != MESA_SHADER_NONE && stages[pipeline->last_vgt_api_stage].nir &&
stages[pipeline->last_vgt_api_stage].nir->xfb_info;
if (!device->physical_device->use_ngg_streamout && uses_xfb) {
}
static bool
-radv_consider_force_vrs(const struct radv_device *device,
- const struct radv_graphics_pipeline *pipeline,
+radv_consider_force_vrs(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline,
const struct radv_pipeline_stage *stages)
{
if (!device->force_vrs_enabled)
return false;
- if (pipeline->last_vgt_api_stage != MESA_SHADER_VERTEX &&
- pipeline->last_vgt_api_stage != MESA_SHADER_TESS_EVAL &&
+ if (pipeline->last_vgt_api_stage != MESA_SHADER_VERTEX && pipeline->last_vgt_api_stage != MESA_SHADER_TESS_EVAL &&
pipeline->last_vgt_api_stage != MESA_SHADER_GEOMETRY)
return false;
static void
radv_fill_shader_info(struct radv_device *device, struct radv_graphics_pipeline *pipeline,
- struct radv_pipeline_layout *pipeline_layout,
- const struct radv_pipeline_key *pipeline_key,
- struct radv_pipeline_stage *stages,
- VkShaderStageFlagBits active_nir_stages)
+ struct radv_pipeline_layout *pipeline_layout, const struct radv_pipeline_key *pipeline_key,
+ struct radv_pipeline_stage *stages, VkShaderStageFlagBits active_nir_stages)
{
bool consider_force_vrs = radv_consider_force_vrs(device, pipeline, stages);
- radv_foreach_stage (i, active_nir_stages) {
+ radv_foreach_stage(i, active_nir_stages)
+ {
gl_shader_stage next_stage = radv_get_next_stage(i, active_nir_stages);
- radv_nir_shader_info_pass(
- device, stages[i].nir, next_stage, pipeline_layout, pipeline_key, pipeline->base.type,
- i == pipeline->last_vgt_api_stage && consider_force_vrs, &stages[i].info);
+ radv_nir_shader_info_pass(device, stages[i].nir, next_stage, pipeline_layout, pipeline_key, pipeline->base.type,
+ i == pipeline->last_vgt_api_stage && consider_force_vrs, &stages[i].info);
}
radv_nir_shader_info_link(device, pipeline_key, stages);
static void
radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stage *stages,
- const struct radv_pipeline_key *pipeline_key,
- VkShaderStageFlagBits active_nir_stages)
+ const struct radv_pipeline_key *pipeline_key, VkShaderStageFlagBits active_nir_stages)
{
enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) {
- radv_declare_shader_args(device, pipeline_key, &stages[MESA_SHADER_TESS_CTRL].info,
- MESA_SHADER_TESS_CTRL, MESA_SHADER_VERTEX, RADV_SHADER_TYPE_DEFAULT,
- &stages[MESA_SHADER_TESS_CTRL].args);
- stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs =
- stages[MESA_SHADER_TESS_CTRL].args.user_sgprs_locs;
+ radv_declare_shader_args(device, pipeline_key, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL,
+ MESA_SHADER_VERTEX, RADV_SHADER_TYPE_DEFAULT, &stages[MESA_SHADER_TESS_CTRL].args);
+ stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].args.user_sgprs_locs;
stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask =
stages[MESA_SHADER_TESS_CTRL].args.ac.inline_push_const_mask;
- stages[MESA_SHADER_VERTEX].info.user_sgprs_locs =
- stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs;
+ stages[MESA_SHADER_VERTEX].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs;
stages[MESA_SHADER_VERTEX].info.inline_push_constant_mask =
stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask;
stages[MESA_SHADER_VERTEX].args = stages[MESA_SHADER_TESS_CTRL].args;
}
if (gfx_level >= GFX9 && stages[MESA_SHADER_GEOMETRY].nir) {
- gl_shader_stage pre_stage =
- stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
- radv_declare_shader_args(device, pipeline_key, &stages[MESA_SHADER_GEOMETRY].info,
- MESA_SHADER_GEOMETRY, pre_stage, RADV_SHADER_TYPE_DEFAULT,
- &stages[MESA_SHADER_GEOMETRY].args);
- stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs =
- stages[MESA_SHADER_GEOMETRY].args.user_sgprs_locs;
+ gl_shader_stage pre_stage = stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
+ radv_declare_shader_args(device, pipeline_key, &stages[MESA_SHADER_GEOMETRY].info, MESA_SHADER_GEOMETRY,
+ pre_stage, RADV_SHADER_TYPE_DEFAULT, &stages[MESA_SHADER_GEOMETRY].args);
+ stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].args.user_sgprs_locs;
stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask =
stages[MESA_SHADER_GEOMETRY].args.ac.inline_push_const_mask;
stages[pre_stage].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs;
- stages[pre_stage].info.inline_push_constant_mask =
- stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask;
+ stages[pre_stage].info.inline_push_constant_mask = stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask;
stages[pre_stage].args = stages[MESA_SHADER_GEOMETRY].args;
active_nir_stages &= ~(1 << pre_stage);
active_nir_stages &= ~(1 << MESA_SHADER_GEOMETRY);
}
u_foreach_bit (i, active_nir_stages) {
- radv_declare_shader_args(device, pipeline_key, &stages[i].info, i, MESA_SHADER_NONE,
- RADV_SHADER_TYPE_DEFAULT, &stages[i].args);
+ radv_declare_shader_args(device, pipeline_key, &stages[i].info, i, MESA_SHADER_NONE, RADV_SHADER_TYPE_DEFAULT,
+ &stages[i].args);
stages[i].info.user_sgprs_locs = stages[i].args.user_sgprs_locs;
stages[i].info.inline_push_constant_mask = stages[i].args.ac.inline_push_const_mask;
}
static struct radv_shader *
radv_pipeline_create_gs_copy_shader(struct radv_device *device, struct radv_pipeline *pipeline,
- struct vk_pipeline_cache *cache,
- struct radv_pipeline_stage *stages,
+ struct vk_pipeline_cache *cache, struct radv_pipeline_stage *stages,
const struct radv_pipeline_key *pipeline_key,
- const struct radv_pipeline_layout *pipeline_layout,
- bool keep_executable_info, bool keep_statistic_info,
- struct radv_shader_binary **gs_copy_binary)
+ const struct radv_pipeline_layout *pipeline_layout, bool keep_executable_info,
+ bool keep_statistic_info, struct radv_shader_binary **gs_copy_binary)
{
const struct radv_shader_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info;
ac_nir_gs_output_info output_info = {
};
nir_shader *nir = ac_nir_create_gs_copy_shader(
stages[MESA_SHADER_GEOMETRY].nir, device->physical_device->rad_info.gfx_level,
- gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask,
- gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false,
- gs_info->force_vrs_per_vertex, &output_info);
+ gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask, gs_info->outinfo.vs_output_param_offset,
+ gs_info->outinfo.param_exports, false, false, gs_info->force_vrs_per_vertex, &output_info);
nir_validate_shader(nir, "after ac_nir_create_gs_copy_shader");
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
.shader_sha1 = {0},
};
radv_nir_shader_info_init(&gs_copy_stage.info);
- radv_nir_shader_info_pass(device, nir, MESA_SHADER_FRAGMENT, pipeline_layout, pipeline_key,
- pipeline->type, false, &gs_copy_stage.info);
+ radv_nir_shader_info_pass(device, nir, MESA_SHADER_FRAGMENT, pipeline_layout, pipeline_key, pipeline->type, false,
+ &gs_copy_stage.info);
gs_copy_stage.info.wave_size = 64; /* Wave32 not supported. */
gs_copy_stage.info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */
gs_copy_stage.info.so = gs_info->so;
gs_copy_stage.info.outinfo = gs_info->outinfo;
gs_copy_stage.info.force_vrs_per_vertex = gs_info->force_vrs_per_vertex;
- radv_declare_shader_args(device, pipeline_key, &gs_copy_stage.info, MESA_SHADER_VERTEX,
- MESA_SHADER_NONE, RADV_SHADER_TYPE_GS_COPY, &gs_copy_stage.args);
+ radv_declare_shader_args(device, pipeline_key, &gs_copy_stage.info, MESA_SHADER_VERTEX, MESA_SHADER_NONE,
+ RADV_SHADER_TYPE_GS_COPY, &gs_copy_stage.args);
gs_copy_stage.info.user_sgprs_locs = gs_copy_stage.args.user_sgprs_locs;
gs_copy_stage.info.inline_push_constant_mask = gs_copy_stage.args.ac.inline_push_const_mask;
- NIR_PASS_V(nir, radv_nir_lower_abi, device->physical_device->rad_info.gfx_level,
- &gs_copy_stage.info, &gs_copy_stage.args, pipeline_key,
- device->physical_device->rad_info.address32_hi);
+ NIR_PASS_V(nir, radv_nir_lower_abi, device->physical_device->rad_info.gfx_level, &gs_copy_stage.info,
+ &gs_copy_stage.args, pipeline_key, device->physical_device->rad_info.address32_hi);
struct radv_pipeline_key key = {
.optimisations_disabled = pipeline_key->optimisations_disabled,
radv_pipeline_nir_to_asm(struct radv_device *device, struct radv_graphics_pipeline *pipeline,
struct vk_pipeline_cache *cache, struct radv_pipeline_stage *stages,
const struct radv_pipeline_key *pipeline_key,
- const struct radv_pipeline_layout *pipeline_layout,
- bool keep_executable_info, bool keep_statistic_info,
- VkShaderStageFlagBits active_nir_stages,
- struct radv_shader_binary **binaries,
- struct radv_shader_binary **gs_copy_binary)
+ const struct radv_pipeline_layout *pipeline_layout, bool keep_executable_info,
+ bool keep_statistic_info, VkShaderStageFlagBits active_nir_stages,
+ struct radv_shader_binary **binaries, struct radv_shader_binary **gs_copy_binary)
{
for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) {
if (!(active_nir_stages & (1 << s)) || pipeline->base.shaders[s])
int64_t stage_start = os_time_get_nano();
- pipeline->base.shaders[s] =
- radv_shader_nir_to_asm(device, cache, &stages[s], shaders, shader_count, pipeline_key,
- keep_executable_info, keep_statistic_info, &binaries[s]);
+ pipeline->base.shaders[s] = radv_shader_nir_to_asm(device, cache, &stages[s], shaders, shader_count, pipeline_key,
+ keep_executable_info, keep_statistic_info, &binaries[s]);
if (s == MESA_SHADER_GEOMETRY && !stages[s].info.is_ngg) {
- pipeline->base.gs_copy_shader = radv_pipeline_create_gs_copy_shader(
- device, &pipeline->base, cache, stages, pipeline_key, pipeline_layout,
- keep_executable_info, keep_statistic_info, gs_copy_binary);
+ pipeline->base.gs_copy_shader =
+ radv_pipeline_create_gs_copy_shader(device, &pipeline->base, cache, stages, pipeline_key, pipeline_layout,
+ keep_executable_info, keep_statistic_info, gs_copy_binary);
}
stages[s].feedback.duration += os_time_get_nano() - stage_start;
static void
radv_pipeline_get_nir(struct radv_device *device, struct radv_graphics_pipeline *pipeline,
- struct radv_pipeline_stage *stages,
- const struct radv_pipeline_key *pipeline_key)
+ struct radv_pipeline_stage *stages, const struct radv_pipeline_key *pipeline_key)
{
for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
if (!stages[s].entrypoint)
/* NIR might already have been imported from a library. */
if (!stages[s].nir) {
- stages[s].nir =
- radv_shader_spirv_to_nir(device, &stages[s], pipeline_key, pipeline->base.is_internal);
+ stages[s].nir = radv_shader_spirv_to_nir(device, &stages[s], pipeline_key, pipeline->base.is_internal);
}
stages[s].feedback.duration += os_time_get_nano() - stage_start;
}
static void
-radv_pipeline_retain_shaders(struct radv_graphics_lib_pipeline *gfx_pipeline_lib,
- struct radv_pipeline_stage *stages)
+radv_pipeline_retain_shaders(struct radv_graphics_lib_pipeline *gfx_pipeline_lib, struct radv_pipeline_stage *stages)
{
for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
if (!stages[s].entrypoint)
blob_finish_get_buffer(&blob, &gfx_pipeline_lib->retained_shaders[s].serialized_nir,
&gfx_pipeline_lib->retained_shaders[s].serialized_nir_size);
- memcpy(gfx_pipeline_lib->retained_shaders[s].shader_sha1, stages[s].shader_sha1,
- sizeof(stages[s].shader_sha1));
+ memcpy(gfx_pipeline_lib->retained_shaders[s].shader_sha1, stages[s].shader_sha1, sizeof(stages[s].shader_sha1));
stages[s].feedback.duration += os_time_get_nano() - stage_start;
}
}
static void
-radv_pipeline_import_retained_shaders(const struct radv_device *device,
- struct radv_graphics_pipeline *pipeline,
- struct radv_graphics_lib_pipeline *lib,
- struct radv_pipeline_stage *stages)
+radv_pipeline_import_retained_shaders(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
+ struct radv_graphics_lib_pipeline *lib, struct radv_pipeline_stage *stages)
{
/* Import the stages (SPIR-V only in case of cache hits). */
for (uint32_t i = 0; i < lib->stage_count; i++) {
stages[s].stage = s;
stages[s].nir = nir_deserialize(NULL, options, &blob_reader);
stages[s].entrypoint = nir_shader_get_entrypoint(stages[s].nir)->function->name;
- memcpy(stages[s].shader_sha1, lib->retained_shaders[s].shader_sha1,
- sizeof(stages[s].shader_sha1));
+ memcpy(stages[s].shader_sha1, lib->retained_shaders[s].shader_sha1, sizeof(stages[s].shader_sha1));
stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
}
static void
-radv_pipeline_load_retained_shaders(const struct radv_device *device,
- struct radv_graphics_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
- struct radv_pipeline_stage *stages)
+radv_pipeline_load_retained_shaders(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo, struct radv_pipeline_stage *stages)
{
const VkPipelineLibraryCreateInfoKHR *libs_info =
vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
- const bool link_optimize =
- (pCreateInfo->flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) != 0;
+ const bool link_optimize = (pCreateInfo->flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) != 0;
/* Nothing to load if no libs are imported. */
if (!libs_info)
for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
RADV_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
- struct radv_graphics_lib_pipeline *gfx_pipeline_lib =
- radv_pipeline_to_graphics_lib(pipeline_lib);
+ struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
radv_pipeline_import_retained_shaders(device, pipeline, gfx_pipeline_lib, stages);
}
if (pipeline->base.type == RADV_PIPELINE_GRAPHICS) {
needs_ps_epilog = pipeline->base.shaders[MESA_SHADER_FRAGMENT] &&
- pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.ps.has_epilog &&
- !pipeline->ps_epilog;
+ pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.ps.has_epilog && !pipeline->ps_epilog;
} else {
assert(pipeline->base.type == RADV_PIPELINE_GRAPHICS_LIB);
- needs_ps_epilog =
- (lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) &&
- !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT);
+ needs_ps_epilog = (lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) &&
+ !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT);
}
if (needs_ps_epilog) {
- pipeline->ps_epilog =
- radv_create_ps_epilog(device, &pipeline_key->ps.epilog, ps_epilog_binary);
+ pipeline->ps_epilog = radv_create_ps_epilog(device, &pipeline_key->ps.epilog, ps_epilog_binary);
if (!pipeline->ps_epilog)
return false;
}
}
static unsigned
-radv_get_rasterization_prim(const struct radv_pipeline_stage *stages,
- const struct radv_pipeline_key *pipeline_key)
+radv_get_rasterization_prim(const struct radv_pipeline_stage *stages, const struct radv_pipeline_key *pipeline_key)
{
unsigned rast_prim;
return -1;
if (stages[MESA_SHADER_GEOMETRY].nir) {
- rast_prim =
- si_conv_gl_prim_to_gs_out(stages[MESA_SHADER_GEOMETRY].nir->info.gs.output_primitive);
+ rast_prim = si_conv_gl_prim_to_gs_out(stages[MESA_SHADER_GEOMETRY].nir->info.gs.output_primitive);
} else if (stages[MESA_SHADER_TESS_EVAL].nir) {
if (stages[MESA_SHADER_TESS_EVAL].nir->info.tess.point_mode) {
rast_prim = V_028A6C_POINTLIST;
} else {
- rast_prim =
- si_conv_tess_prim_to_gs_out(stages[MESA_SHADER_TESS_EVAL].nir->info.tess._primitive_mode);
+ rast_prim = si_conv_tess_prim_to_gs_out(stages[MESA_SHADER_TESS_EVAL].nir->info.tess._primitive_mode);
}
} else if (stages[MESA_SHADER_MESH].nir) {
rast_prim = si_conv_gl_prim_to_gs_out(stages[MESA_SHADER_MESH].nir->info.mesh.primitive_type);
}
static bool
-radv_skip_graphics_pipeline_compile(const struct radv_device *device,
- const struct radv_graphics_pipeline *pipeline,
- VkGraphicsPipelineLibraryFlagBitsEXT lib_flags,
- bool fast_linking_enabled)
+radv_skip_graphics_pipeline_compile(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline,
+ VkGraphicsPipelineLibraryFlagBitsEXT lib_flags, bool fast_linking_enabled)
{
VkShaderStageFlagBits binary_stages = 0;
return false;
/* Do not skip when the PS epilog needs to be compiled. */
- if (!radv_pipeline_needs_dynamic_ps_epilog(pipeline) &&
- pipeline->base.shaders[MESA_SHADER_FRAGMENT] &&
+ if (!radv_pipeline_needs_dynamic_ps_epilog(pipeline) && pipeline->base.shaders[MESA_SHADER_FRAGMENT] &&
pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.ps.has_epilog && !pipeline->ps_epilog)
return false;
}
static VkResult
-radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
- struct radv_pipeline_layout *pipeline_layout,
- struct radv_device *device, struct vk_pipeline_cache *cache,
- const struct radv_pipeline_key *pipeline_key,
- VkGraphicsPipelineLibraryFlagBitsEXT lib_flags,
- bool fast_linking_enabled)
+radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ struct radv_pipeline_layout *pipeline_layout, struct radv_device *device,
+ struct vk_pipeline_cache *cache, const struct radv_pipeline_key *pipeline_key,
+ VkGraphicsPipelineLibraryFlagBitsEXT lib_flags, bool fast_linking_enabled)
{
struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL};
struct radv_shader_binary *gs_copy_binary = NULL;
};
bool skip_shaders_cache = false;
VkResult result = VK_SUCCESS;
- const bool retain_shaders =
- !!(pCreateInfo->flags & VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT);
+ const bool retain_shaders = !!(pCreateInfo->flags & VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT);
int64_t pipeline_start = os_time_get_nano();
}
bool found_in_application_cache = true;
- if (!skip_shaders_cache && radv_pipeline_cache_search(device, cache, &pipeline->base, hash,
- &found_in_application_cache)) {
+ if (!skip_shaders_cache &&
+ radv_pipeline_cache_search(device, cache, &pipeline->base, hash, &found_in_application_cache)) {
if (found_in_application_cache)
- pipeline_feedback.flags |=
- VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
+ pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
if (retain_shaders) {
/* For graphics pipeline libraries created with the RETAIN_LINK_TIME_OPTIMIZATION flag, we
* need to retain the stage info because we can't know if the LTO pipelines will
* be find in the shaders cache.
*/
- struct radv_graphics_lib_pipeline *gfx_pipeline_lib =
- radv_pipeline_to_graphics_lib(&pipeline->base);
+ struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(&pipeline->base);
- gfx_pipeline_lib->stages = radv_copy_shader_stage_create_info(
- device, pCreateInfo->stageCount, pCreateInfo->pStages, gfx_pipeline_lib->mem_ctx);
+ gfx_pipeline_lib->stages = radv_copy_shader_stage_create_info(device, pCreateInfo->stageCount,
+ pCreateInfo->pStages, gfx_pipeline_lib->mem_ctx);
if (!gfx_pipeline_lib->stages)
return VK_ERROR_OUT_OF_HOST_MEMORY;
bool optimize_conservatively = pipeline_key->optimisations_disabled;
- radv_foreach_stage (i, active_nir_stages) {
+ radv_foreach_stage(i, active_nir_stages)
+ {
radv_nir_shader_info_init(&stages[i].info);
}
radv_fill_shader_info_ngg(device, pipeline, pipeline_key, stages);
if (stages[MESA_SHADER_GEOMETRY].nir) {
- gl_shader_stage pre_stage =
- stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
+ gl_shader_stage pre_stage = stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
unsigned nir_gs_flags = nir_lower_gs_intrinsics_per_stream;
if (stages[pre_stage].info.is_ngg) {
if (stages[MESA_SHADER_FRAGMENT].nir) {
unsigned rast_prim = radv_get_rasterization_prim(stages, pipeline_key);
- NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_barycentric, pipeline_key,
- rast_prim);
+ NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_barycentric, pipeline_key, rast_prim);
}
- radv_foreach_stage (i, active_nir_stages) {
+ radv_foreach_stage(i, active_nir_stages)
+ {
int64_t stage_start = os_time_get_nano();
radv_optimize_nir(stages[i].nir, optimize_conservatively);
radv_nir_lower_poly_line_smooth(stages[MESA_SHADER_FRAGMENT].nir, pipeline_key);
}
- radv_fill_shader_info(device, pipeline, pipeline_layout, pipeline_key, stages,
- active_nir_stages);
+ radv_fill_shader_info(device, pipeline, pipeline_layout, pipeline_key, stages, active_nir_stages);
radv_declare_pipeline_args(device, stages, pipeline_key, active_nir_stages);
- radv_foreach_stage (i, active_nir_stages) {
+ radv_foreach_stage(i, active_nir_stages)
+ {
int64_t stage_start = os_time_get_nano();
- radv_postprocess_nir(device, pipeline_layout, pipeline_key, pipeline->last_vgt_api_stage,
- &stages[i]);
+ radv_postprocess_nir(device, pipeline_layout, pipeline_key, pipeline->last_vgt_api_stage, &stages[i]);
stages[i].feedback.duration += os_time_get_nano() - stage_start;
}
/* Compile NIR shaders to AMD assembly. */
- radv_pipeline_nir_to_asm(device, pipeline, cache, stages, pipeline_key, pipeline_layout,
- keep_executable_info, keep_statistic_info, active_nir_stages, binaries,
- &gs_copy_binary);
+ radv_pipeline_nir_to_asm(device, pipeline, cache, stages, pipeline_key, pipeline_layout, keep_executable_info,
+ keep_statistic_info, active_nir_stages, binaries, &gs_copy_binary);
if (!radv_pipeline_create_ps_epilog(device, pipeline, pipeline_key, lib_flags, &ps_epilog_binary))
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
if (libs_info) {
for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
RADV_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
- struct radv_graphics_lib_pipeline *gfx_pipeline_lib =
- radv_pipeline_to_graphics_lib(pipeline_lib);
+ struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
if (!gfx_pipeline_lib->base.active_stages)
continue;
- radv_foreach_stage(s, gfx_pipeline_lib->base.active_stages) {
- creation_feedback->pPipelineStageCreationFeedbacks[num_feedbacks++] =
- stages[s].feedback;
+ radv_foreach_stage(s, gfx_pipeline_lib->base.active_stages)
+ {
+ creation_feedback->pPipelineStageCreationFeedbacks[num_feedbacks++] = stages[s].feedback;
}
}
}
}
static void
-radv_pipeline_emit_blend_state(struct radeon_cmdbuf *ctx_cs,
- const struct radv_graphics_pipeline *pipeline,
+radv_pipeline_emit_blend_state(struct radeon_cmdbuf *ctx_cs, const struct radv_graphics_pipeline *pipeline,
const struct radv_blend_state *blend)
{
struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
}
static void
-radv_emit_hw_vs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
- struct radeon_cmdbuf *cs, const struct radv_shader *shader)
+radv_emit_hw_vs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+ const struct radv_shader *shader)
{
const struct radv_physical_device *pdevice = device->physical_device;
uint64_t va = radv_shader_get_va(shader);
cull_dist_mask = outinfo->cull_dist_mask;
total_mask = clip_dist_mask | cull_dist_mask;
- bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
- outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate;
+ bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index ||
+ outinfo->writes_primitive_shading_rate;
unsigned spi_vs_out_config, nparams;
/* VS is required to export at least one param. */
radeon_set_context_reg(
ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT,
S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
- S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP
- : V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP
- : V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP
- : V_02870C_SPI_SHADER_NONE));
+ S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) |
+ S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) |
+ S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE));
radeon_set_context_reg(
ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL,
S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
- S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(
- misc_vec_ena || (pdevice->rad_info.gfx_level >= GFX10_3 && outinfo->pos_exports > 1)) |
+ S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena ||
+ (pdevice->rad_info.gfx_level >= GFX10_3 && outinfo->pos_exports > 1)) |
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
- S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | total_mask << 8 |
- clip_dist_mask);
+ S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | total_mask << 8 | clip_dist_mask);
if (pdevice->rad_info.gfx_level <= GFX8)
radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, outinfo->writes_viewport_index);
unsigned late_alloc_wave64, cu_mask;
- ac_compute_late_alloc(&pdevice->rad_info, false, false,
- shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64, &cu_mask);
+ ac_compute_late_alloc(&pdevice->rad_info, false, false, shader->config.scratch_bytes_per_wave > 0,
+ &late_alloc_wave64, &cu_mask);
if (pdevice->rad_info.gfx_level >= GFX7) {
- radeon_set_sh_reg_idx(pdevice, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3,
- ac_apply_cu_en(S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F),
- C_00B118_CU_EN, 0, &pdevice->rad_info));
+ radeon_set_sh_reg_idx(
+ pdevice, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3,
+ ac_apply_cu_en(S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F), C_00B118_CU_EN, 0, &pdevice->rad_info));
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
}
if (pdevice->rad_info.gfx_level >= GFX10) {
}
static void
-radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
- struct radeon_cmdbuf *cs, const struct radv_shader *es,
- const struct radv_shader *shader)
+radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+ const struct radv_shader *es, const struct radv_shader *shader)
{
const struct radv_physical_device *pdevice = device->physical_device;
uint64_t va = radv_shader_get_va(shader);
- gl_shader_stage es_type =
- shader->info.stage == MESA_SHADER_GEOMETRY ? shader->info.gs.es_type : shader->info.stage;
+ gl_shader_stage es_type = shader->info.stage == MESA_SHADER_GEOMETRY ? shader->info.gs.es_type : shader->info.stage;
const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info;
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
cull_dist_mask = outinfo->cull_dist_mask;
total_mask = clip_dist_mask | cull_dist_mask;
- bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
- outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate;
+ bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index ||
+ outinfo->writes_primitive_shading_rate;
bool es_enable_prim_id = outinfo->export_prim_id || (es && es->info.uses_prim_id);
bool break_wave_at_eoi = false;
unsigned ge_cntl;
unsigned num_params = MAX2(outinfo->param_exports, 1);
unsigned num_prim_params = outinfo->prim_param_exports;
radeon_set_context_reg(ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG,
- S_0286C4_VS_EXPORT_COUNT(num_params - 1) |
- S_0286C4_PRIM_EXPORT_COUNT(num_prim_params) |
+ S_0286C4_VS_EXPORT_COUNT(num_params - 1) | S_0286C4_PRIM_EXPORT_COUNT(num_prim_params) |
S_0286C4_NO_PC_EXPORT(no_pc_export));
unsigned idx_format = V_028708_SPI_SHADER_1COMP;
outinfo->writes_primitive_shading_rate_per_primitive)
idx_format = V_028708_SPI_SHADER_2COMP;
- radeon_set_context_reg(ctx_cs, R_028708_SPI_SHADER_IDX_FORMAT,
- S_028708_IDX0_EXPORT_FORMAT(idx_format));
+ radeon_set_context_reg(ctx_cs, R_028708_SPI_SHADER_IDX_FORMAT, S_028708_IDX0_EXPORT_FORMAT(idx_format));
radeon_set_context_reg(
ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT,
S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
- S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP
- : V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP
- : V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP
- : V_02870C_SPI_SHADER_NONE));
+ S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) |
+ S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) |
+ S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE));
radeon_set_context_reg(
ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL,
S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
- S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(
- misc_vec_ena || (pdevice->rad_info.gfx_level >= GFX10_3 && outinfo->pos_exports > 1)) |
+ S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena ||
+ (pdevice->rad_info.gfx_level >= GFX10_3 && outinfo->pos_exports > 1)) |
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
- S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | total_mask << 8 |
- clip_dist_mask);
+ S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | total_mask << 8 | clip_dist_mask);
- radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN,
- S_028A84_PRIMITIVEID_EN(es_enable_prim_id) |
- S_028A84_NGG_DISABLE_PROVOK_REUSE(outinfo->export_prim_id));
+ radeon_set_context_reg(
+ ctx_cs, R_028A84_VGT_PRIMITIVEID_EN,
+ S_028A84_PRIMITIVEID_EN(es_enable_prim_id) | S_028A84_NGG_DISABLE_PROVOK_REUSE(outinfo->export_prim_id));
/* NGG specific registers. */
- uint32_t gs_num_invocations =
- shader->info.stage == MESA_SHADER_GEOMETRY ? shader->info.gs.invocations : 1;
+ uint32_t gs_num_invocations = shader->info.stage == MESA_SHADER_GEOMETRY ? shader->info.gs.invocations : 1;
if (pdevice->rad_info.gfx_level < GFX11) {
- radeon_set_context_reg(
- ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
- S_028A44_ES_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) |
- S_028A44_GS_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) |
- S_028A44_GS_INST_PRIMS_IN_SUBGRP(ngg_state->max_gsprims * gs_num_invocations));
+ radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
+ S_028A44_ES_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) |
+ S_028A44_GS_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) |
+ S_028A44_GS_INST_PRIMS_IN_SUBGRP(ngg_state->max_gsprims * gs_num_invocations));
}
radeon_set_context_reg(ctx_cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
S_0287FC_MAX_VERTS_PER_SUBGROUP(ngg_state->max_out_verts));
- radeon_set_context_reg(ctx_cs, R_028B4C_GE_NGG_SUBGRP_CNTL,
- S_028B4C_PRIM_AMP_FACTOR(ngg_state->prim_amp_factor) |
- S_028B4C_THDS_PER_SUBGRP(0)); /* for fast launch */
radeon_set_context_reg(
- ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT,
- S_028B90_CNT(gs_num_invocations) | S_028B90_ENABLE(gs_num_invocations > 1) |
- S_028B90_EN_MAX_VERT_OUT_PER_GS_INSTANCE(ngg_state->max_vert_out_per_gs_instance));
+ ctx_cs, R_028B4C_GE_NGG_SUBGRP_CNTL,
+ S_028B4C_PRIM_AMP_FACTOR(ngg_state->prim_amp_factor) | S_028B4C_THDS_PER_SUBGRP(0)); /* for fast launch */
+ radeon_set_context_reg(ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT,
+ S_028B90_CNT(gs_num_invocations) | S_028B90_ENABLE(gs_num_invocations > 1) |
+ S_028B90_EN_MAX_VERT_OUT_PER_GS_INSTANCE(ngg_state->max_vert_out_per_gs_instance));
if (pdevice->rad_info.gfx_level >= GFX11) {
ge_cntl = S_03096C_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) |
S_03096C_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) |
- S_03096C_BREAK_PRIMGRP_AT_EOI(break_wave_at_eoi) |
- S_03096C_PRIM_GRP_SIZE_GFX11(252);
+ S_03096C_BREAK_PRIMGRP_AT_EOI(break_wave_at_eoi) | S_03096C_PRIM_GRP_SIZE_GFX11(252);
} else {
ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(ngg_state->max_gsprims) |
- S_03096C_VERT_GRP_SIZE(ngg_state->hw_max_esverts) |
- S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
+ S_03096C_VERT_GRP_SIZE(ngg_state->hw_max_esverts) | S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
}
/* Bug workaround for a possible hang with non-tessellation cases.
*
* Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5
*/
- if (pdevice->rad_info.gfx_level == GFX10 && es_type != MESA_SHADER_TESS_EVAL &&
- ngg_state->hw_max_esverts != 256) {
+ if (pdevice->rad_info.gfx_level == GFX10 && es_type != MESA_SHADER_TESS_EVAL && ngg_state->hw_max_esverts != 256) {
ge_cntl &= C_03096C_VERT_GRP_SIZE;
if (ngg_state->hw_max_esverts > 5) {
ac_compute_late_alloc(&pdevice->rad_info, true, shader->info.has_ngg_culling,
shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64, &cu_mask);
- radeon_set_sh_reg_idx(pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
- ac_apply_cu_en(S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F),
- C_00B21C_CU_EN, 0, &pdevice->rad_info));
+ radeon_set_sh_reg_idx(
+ pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
+ ac_apply_cu_en(S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F), C_00B21C_CU_EN, 0, &pdevice->rad_info));
if (pdevice->rad_info.gfx_level >= GFX11) {
radeon_set_sh_reg_idx(
pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
- ac_apply_cu_en(
- S_00B204_CU_EN_GFX11(0x1) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64),
- C_00B204_CU_EN_GFX11, 16, &pdevice->rad_info));
+ ac_apply_cu_en(S_00B204_CU_EN_GFX11(0x1) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64),
+ C_00B204_CU_EN_GFX11, 16, &pdevice->rad_info));
} else {
radeon_set_sh_reg_idx(
pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
- ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) |
- S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64),
+ ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64),
C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info));
}
}
static void
-radv_emit_hw_hs(const struct radv_device *device, struct radeon_cmdbuf *cs,
- const struct radv_shader *shader)
+radv_emit_hw_hs(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_shader *shader)
{
const struct radv_physical_device *pdevice = device->physical_device;
uint64_t va = radv_shader_get_va(shader);
}
static void
-radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
- struct radeon_cmdbuf *cs, const struct radv_shader *vs)
+radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+ const struct radv_shader *vs)
{
if (vs->info.vs.as_ls)
radv_emit_hw_ls(cs, vs);
}
static void
-radv_emit_tess_ctrl_shader(const struct radv_device *device, struct radeon_cmdbuf *cs,
- const struct radv_shader *tcs)
+radv_emit_tess_ctrl_shader(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_shader *tcs)
{
radv_emit_hw_hs(device, cs, tcs);
}
static void
-radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
- struct radeon_cmdbuf *cs, const struct radv_shader *tes)
+radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+ const struct radv_shader *tes)
{
if (tes->info.is_ngg) {
radv_emit_hw_ngg(device, ctx_cs, cs, NULL, tes);
}
static void
-radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
- struct radeon_cmdbuf *cs, const struct radv_shader *gs)
+radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+ const struct radv_shader *gs)
{
const struct radv_physical_device *pdevice = device->physical_device;
const struct radv_legacy_gs_info *gs_state = &gs->info.gs_ring_info;
radeon_emit(ctx_cs, (max_stream >= 3) ? num_components[3] : 0);
uint32_t gs_num_invocations = gs->info.gs.invocations;
- radeon_set_context_reg(
- ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT,
- S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0));
+ radeon_set_context_reg(ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT,
+ S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0));
if (pdevice->rad_info.gfx_level <= GFX8) {
/* GFX6-8: ESGS offchip ring buffer is allocated according to VGT_ESGS_RING_ITEMSIZE.
* GFX9+: Only used to set the GS input VGPRs, emulated in shaders.
*/
- radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
- gs_state->vgt_esgs_ring_itemsize);
+ radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, gs_state->vgt_esgs_ring_itemsize);
}
va = radv_shader_get_va(gs);
radeon_emit(cs, gs->config.rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size));
radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl);
- radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
- gs_state->vgt_gs_max_prims_per_subgroup);
+ radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, gs_state->vgt_gs_max_prims_per_subgroup);
} else {
radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
radeon_emit(cs, va >> 8);
radeon_emit(cs, gs->config.rsrc2);
}
- radeon_set_sh_reg_idx(pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
- ac_apply_cu_en(S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F),
- C_00B21C_CU_EN, 0, &pdevice->rad_info));
+ radeon_set_sh_reg_idx(
+ pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
+ ac_apply_cu_en(S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F), C_00B21C_CU_EN, 0, &pdevice->rad_info));
if (pdevice->rad_info.gfx_level >= GFX10) {
- radeon_set_sh_reg_idx(
- pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
- ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0),
- C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info));
+ radeon_set_sh_reg_idx(pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
+ ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0),
+ C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info));
}
}
static void
-radv_emit_geometry_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
- struct radeon_cmdbuf *cs, const struct radv_shader *gs,
- const struct radv_shader *es, const struct radv_shader *gs_copy_shader)
+radv_emit_geometry_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+ const struct radv_shader *gs, const struct radv_shader *es,
+ const struct radv_shader *gs_copy_shader)
{
if (gs->info.is_ngg) {
radv_emit_hw_ngg(device, ctx_cs, cs, es, gs);
}
static void
-radv_emit_mesh_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
- struct radeon_cmdbuf *cs, const struct radv_shader *ms)
+radv_emit_mesh_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+ const struct radv_shader *ms)
{
const struct radv_physical_device *pdevice = device->physical_device;
radv_emit_hw_ngg(device, ctx_cs, cs, NULL, ms);
radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, ms->info.workgroup_size);
- radeon_set_uconfig_reg_idx(pdevice, ctx_cs, R_030908_VGT_PRIMITIVE_TYPE, 1,
- V_008958_DI_PT_POINTLIST);
+ radeon_set_uconfig_reg_idx(pdevice, ctx_cs, R_030908_VGT_PRIMITIVE_TYPE, 1, V_008958_DI_PT_POINTLIST);
}
static uint32_t
-offset_to_ps_input(uint32_t offset, bool flat_shade, bool explicit, bool per_vertex, bool float16,
- bool per_prim_gfx11)
+offset_to_ps_input(uint32_t offset, bool flat_shade, bool explicit, bool per_vertex, bool float16, bool per_prim_gfx11)
{
uint32_t ps_input_cntl;
if (offset <= AC_EXP_PARAM_OFFSET_31) {
}
static void
-single_slot_to_ps_input(const struct radv_vs_output_info *outinfo, unsigned slot,
- uint32_t *ps_input_cntl, unsigned *ps_offset, bool skip_undef,
- bool use_default_0, bool flat_shade, bool per_prim_gfx11)
+single_slot_to_ps_input(const struct radv_vs_output_info *outinfo, unsigned slot, uint32_t *ps_input_cntl,
+ unsigned *ps_offset, bool skip_undef, bool use_default_0, bool flat_shade, bool per_prim_gfx11)
{
unsigned vs_offset = outinfo->vs_output_param_offset[slot];
unreachable("vs_offset should not be AC_EXP_PARAM_UNDEFINED.");
}
- ps_input_cntl[*ps_offset] =
- offset_to_ps_input(vs_offset, flat_shade, false, false, false, per_prim_gfx11);
+ ps_input_cntl[*ps_offset] = offset_to_ps_input(vs_offset, flat_shade, false, false, false, per_prim_gfx11);
++(*ps_offset);
}
static void
-input_mask_to_ps_inputs(const struct radv_vs_output_info *outinfo, const struct radv_shader *ps,
- uint32_t input_mask, uint32_t *ps_input_cntl, unsigned *ps_offset,
- bool per_prim_gfx11)
+input_mask_to_ps_inputs(const struct radv_vs_output_info *outinfo, const struct radv_shader *ps, uint32_t input_mask,
+ uint32_t *ps_input_cntl, unsigned *ps_offset, bool per_prim_gfx11)
{
u_foreach_bit (i, input_mask) {
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i];
unsigned ps_offset = 0;
if (ps->info.ps.prim_id_input && !mesh)
- single_slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, true,
- false, true, false);
+ single_slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, true, false, true, false);
if (ps->info.ps.layer_input && !mesh)
- single_slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, false, true,
- true, false);
+ single_slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, false, true, true, false);
if (ps->info.ps.viewport_index_input && !mesh)
- single_slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, false,
- true, true, false);
+ single_slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, false, true, true, false);
if (ps->info.ps.has_pcoord)
ps_input_cntl[ps_offset++] = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
if (ps->info.ps.num_input_clips_culls) {
- single_slot_to_ps_input(outinfo, VARYING_SLOT_CLIP_DIST0, ps_input_cntl, &ps_offset, true,
- false, false, false);
+ single_slot_to_ps_input(outinfo, VARYING_SLOT_CLIP_DIST0, ps_input_cntl, &ps_offset, true, false, false, false);
if (ps->info.ps.num_input_clips_culls > 4)
- single_slot_to_ps_input(outinfo, VARYING_SLOT_CLIP_DIST1, ps_input_cntl, &ps_offset, true,
- false, false, false);
+ single_slot_to_ps_input(outinfo, VARYING_SLOT_CLIP_DIST1, ps_input_cntl, &ps_offset, true, false, false,
+ false);
}
input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_mask, ps_input_cntl, &ps_offset, false);
/* Per-primitive PS inputs: the HW needs these to be last. */
if (ps->info.ps.prim_id_input && mesh)
- single_slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, true,
- false, false, gfx11plus);
+ single_slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, true, false, false,
+ gfx11plus);
if (ps->info.ps.layer_input && mesh)
- single_slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, false, true,
- false, gfx11plus);
+ single_slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, false, true, false, gfx11plus);
if (ps->info.ps.viewport_index_input && mesh)
- single_slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, false,
- true, false, gfx11plus);
+ single_slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, false, true, false, gfx11plus);
- input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_per_primitive_mask, ps_input_cntl,
- &ps_offset, gfx11plus);
+ input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_per_primitive_mask, ps_input_cntl, &ps_offset, gfx11plus);
if (ps_offset) {
radeon_set_context_reg_seq(ctx_cs, R_028644_SPI_PS_INPUT_CNTL_0, ps_offset);
for (unsigned i = 0; i < ps_offset; i++) {
}
static void
-radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
- struct radeon_cmdbuf *cs, const struct radv_shader *ps)
+radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+ const struct radv_shader *ps)
{
const struct radv_physical_device *pdevice = device->physical_device;
bool param_gen;
radeon_emit(ctx_cs, ps->config.spi_ps_input_addr);
/* Workaround when there are no PS inputs but LDS is used. */
- param_gen =
- pdevice->rad_info.gfx_level >= GFX11 && !ps->info.ps.num_interp && ps->config.lds_size;
+ param_gen = pdevice->rad_info.gfx_level >= GFX11 && !ps->info.ps.num_interp && ps->config.lds_size;
radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
S_0286D8_NUM_INTERP(ps->info.ps.num_interp) |
S_0286D8_NUM_PRIM_INTERP(ps->info.ps.num_prim_interp) |
- S_0286D8_PS_W32_EN(ps->info.wave_size == 32) |
- S_0286D8_PARAM_GEN(param_gen));
+ S_0286D8_PS_W32_EN(ps->info.wave_size == 32) | S_0286D8_PARAM_GEN(param_gen));
- radeon_set_context_reg(
- ctx_cs, R_028710_SPI_SHADER_Z_FORMAT,
- ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil,
- ps->info.ps.writes_sample_mask, ps->info.ps.writes_mrt0_alpha));
+ radeon_set_context_reg(ctx_cs, R_028710_SPI_SHADER_Z_FORMAT,
+ ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil,
+ ps->info.ps.writes_sample_mask, ps->info.ps.writes_mrt0_alpha));
}
static void
TESS_SPACING_FRACTIONAL_ODD) {
vtx_reuse_depth = 14;
}
- radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
- S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth));
+ radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth));
}
static struct radv_vgt_shader_key
-radv_pipeline_generate_vgt_shader_key(const struct radv_device *device,
- const struct radv_graphics_pipeline *pipeline)
+radv_pipeline_generate_vgt_shader_key(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline)
{
uint8_t hs_size = 64, gs_size = 64, vs_size = 64;
struct radv_vgt_shader_key key;
key.streamout = !!pipeline->streamout_shader;
if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) {
key.mesh = 1;
- key.mesh_scratch_ring =
- pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring;
+ key.mesh_scratch_ring = pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring;
}
key.hs_wave32 = hs_size == 32;
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1);
} else if (key->mesh) {
assert(!key->ngg_passthrough);
- stages |= S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(1) |
- S_028B54_NGG_WAVE_ID_EN(key->mesh_scratch_ring);
+ stages |= S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(1) | S_028B54_NGG_WAVE_ID_EN(key->mesh_scratch_ring);
} else if (key->ngg) {
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL);
}
if (key->ngg) {
- stages |= S_028B54_PRIMGEN_EN(1) |
- S_028B54_NGG_WAVE_ID_EN(key->streamout) |
+ stages |= S_028B54_PRIMGEN_EN(1) | S_028B54_NGG_WAVE_ID_EN(key->streamout) |
S_028B54_PRIMGEN_PASSTHRU_EN(key->ngg_passthrough) |
- S_028B54_PRIMGEN_PASSTHRU_NO_MSG(key->ngg_passthrough &&
- pdevice->rad_info.family >= CHIP_NAVI23);
+ S_028B54_PRIMGEN_PASSTHRU_NO_MSG(key->ngg_passthrough && pdevice->rad_info.family >= CHIP_NAVI23);
} else if (key->gs) {
stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
}
stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
if (pdevice->rad_info.gfx_level >= GFX10) {
- stages |= S_028B54_HS_W32_EN(key->hs_wave32) |
- S_028B54_GS_W32_EN(key->gs_wave32) |
+ stages |= S_028B54_HS_W32_EN(key->hs_wave32) | S_028B54_GS_W32_EN(key->gs_wave32) |
S_028B54_VS_W32_EN(pdevice->rad_info.gfx_level < GFX11 && key->vs_wave32);
/* Legacy GS only supports Wave64. Read it as an implication. */
assert(!(key->gs && !key->ngg) || !key->gs_wave32);
static void
radv_pipeline_emit_vgt_gs_out(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
- const struct radv_graphics_pipeline *pipeline,
- uint32_t vgt_gs_out_prim_type)
+ const struct radv_graphics_pipeline *pipeline, uint32_t vgt_gs_out_prim_type)
{
const struct radv_physical_device *pdevice = device->physical_device;
}
static void
-gfx103_pipeline_emit_vgt_draw_payload_cntl(struct radeon_cmdbuf *ctx_cs,
- const struct radv_graphics_pipeline *pipeline,
+gfx103_pipeline_emit_vgt_draw_payload_cntl(struct radeon_cmdbuf *ctx_cs, const struct radv_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state)
{
const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
/* Enables the second channel of the primitive export instruction.
* This channel contains: VRS rate x, y, viewport and layer.
*/
- bool enable_prim_payload = outinfo && (outinfo->writes_viewport_index_per_primitive ||
- outinfo->writes_layer_per_primitive ||
- outinfo->writes_primitive_shading_rate_per_primitive);
+ bool enable_prim_payload =
+ outinfo && (outinfo->writes_viewport_index_per_primitive || outinfo->writes_layer_per_primitive ||
+ outinfo->writes_primitive_shading_rate_per_primitive);
- radeon_set_context_reg(
- ctx_cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL,
- S_028A98_EN_VRS_RATE(enable_vrs) | S_028A98_EN_PRIM_PAYLOAD(enable_prim_payload));
+ radeon_set_context_reg(ctx_cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL,
+ S_028A98_EN_VRS_RATE(enable_vrs) | S_028A98_EN_PRIM_PAYLOAD(enable_prim_payload));
}
static bool
-gfx103_pipeline_vrs_coarse_shading(const struct radv_device *device,
- const struct radv_graphics_pipeline *pipeline)
+gfx103_pipeline_vrs_coarse_shading(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline)
{
struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
* requested by the user. Note that vkd3d-proton always has to declare VRS as dynamic because
* in DX12 it's fully dynamic.
*/
- radeon_set_context_reg(
- ctx_cs, R_028848_PA_CL_VRS_CNTL,
- S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE) |
- S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE));
+ radeon_set_context_reg(ctx_cs, R_028848_PA_CL_VRS_CNTL,
+ S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE) |
+ S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE));
/* If the shader is using discard, turn off coarse shading because discard at 2x2 pixel
* granularity degrades quality too much. MIN allows sample shading but not coarse shading.
*/
struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
- mode = ps->info.ps.can_discard ? V_028064_SC_VRS_COMB_MODE_MIN
- : V_028064_SC_VRS_COMB_MODE_PASSTHRU;
+ mode = ps->info.ps.can_discard ? V_028064_SC_VRS_COMB_MODE_MIN : V_028064_SC_VRS_COMB_MODE_PASSTHRU;
}
if (pdevice->rad_info.gfx_level < GFX11) {
radeon_set_context_reg(ctx_cs, R_028064_DB_VRS_OVERRIDE_CNTL,
- S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) |
- S_028064_VRS_OVERRIDE_RATE_X(rate_x) |
+ S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) | S_028064_VRS_OVERRIDE_RATE_X(rate_x) |
S_028064_VRS_OVERRIDE_RATE_Y(rate_y));
}
}
cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw));
ctx_cs->buf = cs->buf + cs->max_dw;
- struct radv_vgt_shader_key vgt_shader_key =
- radv_pipeline_generate_vgt_shader_key(device, pipeline);
+ struct radv_vgt_shader_key vgt_shader_key = radv_pipeline_generate_vgt_shader_key(device, pipeline);
radv_pipeline_emit_blend_state(ctx_cs, pipeline, blend);
radv_pipeline_emit_vgt_gs_mode(device, ctx_cs, pipeline);
radv_emit_tess_ctrl_shader(device, cs, pipeline->base.shaders[MESA_SHADER_TESS_CTRL]);
if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
- radv_emit_tess_eval_shader(device, ctx_cs, cs,
- pipeline->base.shaders[MESA_SHADER_TESS_EVAL]);
+ radv_emit_tess_eval_shader(device, ctx_cs, cs, pipeline->base.shaders[MESA_SHADER_TESS_EVAL]);
}
- if (pdevice->rad_info.gfx_level >= GFX10 &&
- !radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) &&
+ if (pdevice->rad_info.gfx_level >= GFX10 && !radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) &&
!radv_pipeline_has_ngg(pipeline)) {
radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
- S_028A44_ES_VERTS_PER_SUBGRP(250) |
- S_028A44_GS_PRIMS_PER_SUBGRP(126) |
+ S_028A44_ES_VERTS_PER_SUBGRP(250) | S_028A44_GS_PRIMS_PER_SUBGRP(126) |
S_028A44_GS_INST_PRIMS_IN_SUBGRP(126));
}
}
}
static void
-radv_pipeline_init_vertex_input_state(const struct radv_device *device,
- struct radv_graphics_pipeline *pipeline,
+radv_pipeline_init_vertex_input_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state)
{
const struct radv_physical_device *pdevice = device->physical_device;
- const struct radv_shader_info *vs_info =
- &radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX)->info;
+ const struct radv_shader_info *vs_info = &radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX)->info;
if (state->vi) {
u_foreach_bit (i, state->vi->attributes_valid) {
if (vs_info->vs.has_prolog && !(pipeline->dynamic_states & RADV_DYNAMIC_VERTEX_INPUT)) {
const enum amd_gfx_level gfx_level = pdevice->rad_info.gfx_level;
const enum radeon_family family = pdevice->rad_info.family;
- const struct ac_vtx_format_info *vtx_info_table =
- ac_get_vtx_format_info_table(gfx_level, family);
+ const struct ac_vtx_format_info *vtx_info_table = ac_get_vtx_format_info_table(gfx_level, family);
pipeline->vs_input_state.bindings_match_attrib = true;
const struct ac_vtx_format_info *vtx_info = &vtx_info_table[format];
pipeline->vs_input_state.formats[i] = format;
- uint8_t align_req_minus_1 =
- vtx_info->chan_byte_size >= 4 ? 3 : (vtx_info->element_size - 1);
+ uint8_t align_req_minus_1 = vtx_info->chan_byte_size >= 4 ? 3 : (vtx_info->element_size - 1);
pipeline->vs_input_state.format_align_req_minus_1[i] = align_req_minus_1;
pipeline->vs_input_state.format_sizes[i] = vtx_info->element_size;
pipeline->vs_input_state.alpha_adjust_lo |= (vtx_info->alpha_adjust & 0x1) << i;
return NULL;
}
static void
-radv_pipeline_init_shader_stages_state(const struct radv_device *device,
- struct radv_graphics_pipeline *pipeline)
+radv_pipeline_init_shader_stages_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline)
{
for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
bool shader_exists = !!pipeline->base.shaders[i];
radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH) ? MESA_SHADER_MESH : MESA_SHADER_VERTEX;
const struct radv_shader *shader = radv_get_shader(pipeline->base.shaders, first_stage);
- const struct radv_userdata_info *loc =
- radv_get_user_sgpr(shader, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
+ const struct radv_userdata_info *loc = radv_get_user_sgpr(shader, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
if (loc->sgpr_idx != -1) {
pipeline->vtx_base_sgpr = shader->info.user_data_0;
pipeline->vtx_base_sgpr += loc->sgpr_idx * 4;
pipeline->vtx_emit_num = loc->num_sgprs;
- pipeline->uses_drawid =
- radv_get_shader(pipeline->base.shaders, first_stage)->info.vs.needs_draw_id;
- pipeline->uses_baseinstance =
- radv_get_shader(pipeline->base.shaders, first_stage)->info.vs.needs_base_instance;
+ pipeline->uses_drawid = radv_get_shader(pipeline->base.shaders, first_stage)->info.vs.needs_draw_id;
+ pipeline->uses_baseinstance = radv_get_shader(pipeline->base.shaders, first_stage)->info.vs.needs_base_instance;
assert(first_stage != MESA_SHADER_MESH || !pipeline->uses_baseinstance);
}
}
static uint32_t
-radv_pipeline_init_vgt_gs_out(struct radv_graphics_pipeline *pipeline,
- const struct vk_graphics_pipeline_state *state)
+radv_pipeline_init_vgt_gs_out(struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state)
{
uint32_t gs_out;
if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
- gs_out = si_conv_gl_prim_to_gs_out(
- pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim);
+ gs_out = si_conv_gl_prim_to_gs_out(pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim);
} else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.tes.point_mode) {
gs_out = V_028A6C_POINTLIST;
} else {
- gs_out = si_conv_tess_prim_to_gs_out(
- pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.tes._primitive_mode);
+ gs_out = si_conv_tess_prim_to_gs_out(pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.tes._primitive_mode);
}
} else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) {
- gs_out =
- si_conv_gl_prim_to_gs_out(pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.output_prim);
+ gs_out = si_conv_gl_prim_to_gs_out(pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.output_prim);
} else {
gs_out = si_conv_prim_to_gs_out(si_translate_prim(state->ia->primitive_topology), false);
}
static void
radv_pipeline_init_extra(struct radv_graphics_pipeline *pipeline,
- const struct radv_graphics_pipeline_create_info *extra,
- struct radv_blend_state *blend_state,
- const struct vk_graphics_pipeline_state *state,
- uint32_t *vgt_gs_out_prim_type)
+ const struct radv_graphics_pipeline_create_info *extra, struct radv_blend_state *blend_state,
+ const struct vk_graphics_pipeline_state *state, uint32_t *vgt_gs_out_prim_type)
{
if (extra->custom_blend_mode == V_028808_CB_ELIMINATE_FAST_CLEAR ||
extra->custom_blend_mode == V_028808_CB_FMASK_DECOMPRESS ||
pipeline->db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear);
pipeline->db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->resummarize_enable);
pipeline->db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->depth_compress_disable);
- pipeline->db_render_control |=
- S_028000_STENCIL_COMPRESS_DISABLE(extra->stencil_compress_disable);
+ pipeline->db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->stencil_compress_disable);
}
}
static VkResult
radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv_device *device,
- struct vk_pipeline_cache *cache,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo,
const struct radv_graphics_pipeline_create_info *extra)
{
VkGraphicsPipelineLibraryFlagBitsEXT needed_lib_flags = ALL_GRAPHICS_LIB_FLAGS;
/* If we have libraries, import them first. */
if (libs_info) {
- const bool link_optimize =
- (pCreateInfo->flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) != 0;
+ const bool link_optimize = (pCreateInfo->flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) != 0;
for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
RADV_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
- struct radv_graphics_lib_pipeline *gfx_pipeline_lib =
- radv_pipeline_to_graphics_lib(pipeline_lib);
+ struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
assert(pipeline_lib->type == RADV_PIPELINE_GRAPHICS_LIB);
*/
assert(!link_optimize || gfx_pipeline_lib->base.retain_shaders);
- radv_graphics_pipeline_import_lib(device, pipeline, &state, &pipeline_layout,
- gfx_pipeline_lib, link_optimize);
+ radv_graphics_pipeline_import_lib(device, pipeline, &state, &pipeline_layout, gfx_pipeline_lib, link_optimize);
needed_lib_flags &= ~gfx_pipeline_lib->lib_flags;
}
}
/* Import graphics pipeline info that was not included in the libraries. */
- result = radv_pipeline_import_graphics_info(device, pipeline, &state, &pipeline_layout,
- pCreateInfo, needed_lib_flags);
+ result =
+ radv_pipeline_import_graphics_info(device, pipeline, &state, &pipeline_layout, pCreateInfo, needed_lib_flags);
if (result != VK_SUCCESS) {
radv_pipeline_layout_finish(device, &pipeline_layout);
return result;
if (!fast_linking_enabled)
radv_pipeline_layout_hash(&pipeline_layout);
- if (!radv_skip_graphics_pipeline_compile(device, pipeline, needed_lib_flags,
- fast_linking_enabled)) {
- struct radv_pipeline_key key = radv_generate_graphics_pipeline_key(
- device, pipeline, pCreateInfo, &state, needed_lib_flags);
+ if (!radv_skip_graphics_pipeline_compile(device, pipeline, needed_lib_flags, fast_linking_enabled)) {
+ struct radv_pipeline_key key =
+ radv_generate_graphics_pipeline_key(device, pipeline, pCreateInfo, &state, needed_lib_flags);
- result = radv_graphics_pipeline_compile(pipeline, pCreateInfo, &pipeline_layout, device,
- cache, &key, needed_lib_flags, fast_linking_enabled);
+ result = radv_graphics_pipeline_compile(pipeline, pCreateInfo, &pipeline_layout, device, cache, &key,
+ needed_lib_flags, fast_linking_enabled);
if (result != VK_SUCCESS) {
radv_pipeline_layout_finish(device, &pipeline_layout);
return result;
uint32_t vgt_gs_out_prim_type = radv_pipeline_init_vgt_gs_out(pipeline, &state);
- radv_pipeline_init_multisample_state(device, pipeline, pCreateInfo, &state,
- vgt_gs_out_prim_type);
+ radv_pipeline_init_multisample_state(device, pipeline, pCreateInfo, &state, vgt_gs_out_prim_type);
if (!radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH))
radv_pipeline_init_input_assembly_state(device, pipeline);
}
unsigned custom_blend_mode = extra ? extra->custom_blend_mode : 0;
- if (radv_needs_null_export_workaround(device, ps, custom_blend_mode) &&
- !blend.spi_shader_col_format) {
+ if (radv_needs_null_export_workaround(device, ps, custom_blend_mode) && !blend.spi_shader_col_format) {
blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
pipeline->col_format_non_compacted = V_028714_SPI_SHADER_32_R;
}
pipeline->is_ngg = radv_pipeline_has_ngg(pipeline);
pipeline->has_ngg_culling =
- pipeline->is_ngg &&
- pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.has_ngg_culling;
- pipeline->force_vrs_per_vertex =
- pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.force_vrs_per_vertex;
+ pipeline->is_ngg && pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.has_ngg_culling;
+ pipeline->force_vrs_per_vertex = pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.force_vrs_per_vertex;
pipeline->rast_prim = vgt_gs_out_prim_type;
- pipeline->uses_out_of_order_rast =
- state.rs->rasterization_order_amd == VK_RASTERIZATION_ORDER_RELAXED_AMD;
+ pipeline->uses_out_of_order_rast = state.rs->rasterization_order_amd == VK_RASTERIZATION_ORDER_RELAXED_AMD;
pipeline->uses_vrs_attachment = radv_pipeline_uses_vrs_attachment(pCreateInfo, &state);
pipeline->base.push_constant_size = pipeline_layout.push_constant_size;
}
VkResult
-radv_graphics_pipeline_create(VkDevice _device, VkPipelineCache _cache,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
+radv_graphics_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkGraphicsPipelineCreateInfo *pCreateInfo,
const struct radv_graphics_pipeline_create_info *extra,
const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
{
struct radv_graphics_pipeline *pipeline;
VkResult result;
- pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pipeline == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
*pPipeline = radv_pipeline_to_handle(&pipeline->base);
- radv_rmv_log_graphics_pipeline_create(device, pCreateInfo->flags, &pipeline->base,
- pipeline->base.is_internal);
+ radv_rmv_log_graphics_pipeline_create(device, pCreateInfo->flags, &pipeline->base, pipeline->base.is_internal);
return VK_SUCCESS;
}
}
static VkResult
-radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline,
- struct radv_device *device, struct vk_pipeline_cache *cache,
- const VkGraphicsPipelineCreateInfo *pCreateInfo)
+radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, struct radv_device *device,
+ struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
VkResult result;
/* If we have libraries, import them first. */
if (libs_info) {
- const bool link_optimize =
- (pCreateInfo->flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) != 0;
+ const bool link_optimize = (pCreateInfo->flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) != 0;
for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
RADV_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
- struct radv_graphics_lib_pipeline *gfx_pipeline_lib =
- radv_pipeline_to_graphics_lib(pipeline_lib);
+ struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
- radv_graphics_pipeline_import_lib(device, &pipeline->base, state, pipeline_layout,
- gfx_pipeline_lib, link_optimize);
+ radv_graphics_pipeline_import_lib(device, &pipeline->base, state, pipeline_layout, gfx_pipeline_lib,
+ link_optimize);
pipeline->lib_flags |= gfx_pipeline_lib->lib_flags;
}
}
- result = radv_pipeline_import_graphics_info(device, &pipeline->base, state, pipeline_layout,
- pCreateInfo, needed_lib_flags);
+ result = radv_pipeline_import_graphics_info(device, &pipeline->base, state, pipeline_layout, pCreateInfo,
+ needed_lib_flags);
if (result != VK_SUCCESS)
return result;
if (!fast_linking_enabled)
radv_pipeline_layout_hash(pipeline_layout);
- struct radv_pipeline_key key = radv_generate_graphics_pipeline_key(
- device, &pipeline->base, pCreateInfo, state, needed_lib_flags);
+ struct radv_pipeline_key key =
+ radv_generate_graphics_pipeline_key(device, &pipeline->base, pCreateInfo, state, needed_lib_flags);
- return radv_graphics_pipeline_compile(&pipeline->base, pCreateInfo, pipeline_layout, device,
- cache, &key, needed_lib_flags, fast_linking_enabled);
+ return radv_graphics_pipeline_compile(&pipeline->base, pCreateInfo, pipeline_layout, device, cache, &key,
+ needed_lib_flags, fast_linking_enabled);
}
static VkResult
struct radv_graphics_lib_pipeline *pipeline;
VkResult result;
- pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pipeline == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
void
-radv_destroy_graphics_lib_pipeline(struct radv_device *device,
- struct radv_graphics_lib_pipeline *pipeline)
+radv_destroy_graphics_lib_pipeline(struct radv_device *device, struct radv_graphics_lib_pipeline *pipeline)
{
radv_pipeline_layout_finish(device, &pipeline->layout);
VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
- const VkGraphicsPipelineCreateInfo *pCreateInfos,
- const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
+ const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipelines)
{
VkResult result = VK_SUCCESS;
unsigned i = 0;
for (; i < count; i++) {
VkResult r;
if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) {
- r = radv_graphics_lib_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator,
- &pPipelines[i]);
+ r = radv_graphics_lib_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i]);
} else {
- r = radv_graphics_pipeline_create(_device, pipelineCache, &pCreateInfos[i], NULL,
- pAllocator, &pPipelines[i]);
+ r = radv_graphics_pipeline_create(_device, pipelineCache, &pCreateInfos[i], NULL, pAllocator, &pPipelines[i]);
}
if (r != VK_SUCCESS) {
result = r;
};
static uint32_t
-handle_from_stages(struct radv_device *device, struct radv_ray_tracing_stage *stages,
- unsigned stage_count, bool replay_namespace)
+handle_from_stages(struct radv_device *device, struct radv_ray_tracing_stage *stages, unsigned stage_count,
+ bool replay_namespace)
{
struct mesa_sha1 ctx;
_mesa_sha1_init(&ctx);
}
static VkResult
-radv_create_group_handles(struct radv_device *device,
- const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
- struct radv_ray_tracing_stage *stages,
- struct radv_ray_tracing_group *groups)
+radv_create_group_handles(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
+ struct radv_ray_tracing_stage *stages, struct radv_ray_tracing_group *groups)
{
- bool capture_replay = pCreateInfo->flags &
- VK_PIPELINE_CREATE_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR;
+ bool capture_replay = pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR;
for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) {
const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i];
switch (group_info->type) {
if (group_info->anyHitShader != VK_SHADER_UNUSED_KHR)
temp_stages[cnt++] = stages[group_info->anyHitShader];
- groups[i].handle.intersection_index =
- handle_from_stages(device, temp_stages, cnt, capture_replay);
+ groups[i].handle.intersection_index = handle_from_stages(device, temp_stages, cnt, capture_replay);
}
break;
case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
if (capture_replay) {
if (group_info->pShaderGroupCaptureReplayHandle &&
- memcmp(group_info->pShaderGroupCaptureReplayHandle, &groups[i].handle,
- sizeof(groups[i].handle)) != 0) {
+ memcmp(group_info->pShaderGroupCaptureReplayHandle, &groups[i].handle, sizeof(groups[i].handle)) != 0) {
return VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS;
}
}
}
static VkResult
-radv_rt_fill_group_info(struct radv_device *device,
- const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
- struct radv_ray_tracing_stage *stages,
- struct radv_ray_tracing_group *groups)
+radv_rt_fill_group_info(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
+ struct radv_ray_tracing_stage *stages, struct radv_ray_tracing_group *groups)
{
VkResult result = radv_create_group_handles(device, pCreateInfo, stages, groups);
unsigned stage_count = pCreateInfo->stageCount;
for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]);
- struct radv_ray_tracing_pipeline *library_pipeline =
- radv_pipeline_to_ray_tracing(pipeline);
+ struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline);
for (unsigned j = 0; j < library_pipeline->group_count; ++j) {
struct radv_ray_tracing_group *dst = &groups[idx + j];
}
static void
-radv_rt_fill_stage_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
- struct radv_ray_tracing_stage *stages)
+radv_rt_fill_stage_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct radv_ray_tracing_stage *stages)
{
uint32_t idx;
for (idx = 0; idx < pCreateInfo->stageCount; idx++) {
if (pCreateInfo->pLibraryInfo) {
for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]);
- struct radv_ray_tracing_pipeline *library_pipeline =
- radv_pipeline_to_ray_tracing(pipeline);
+ struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline);
for (unsigned j = 0; j < library_pipeline->stage_count; ++j) {
stages[idx].shader = vk_pipeline_cache_object_ref(library_pipeline->stages[j].shader);
stages[idx].stage = library_pipeline->stages[j].stage;
if (pCreateInfo->pLibraryInfo) {
for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]);
- struct radv_ray_tracing_pipeline *library_pipeline =
- radv_pipeline_to_ray_tracing(pipeline);
+ struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline);
total_stages += library_pipeline->stage_count;
total_groups += library_pipeline->group_count;
}
}
- nir_metadata_preserve(nir_shader_get_entrypoint(shader),
- nir_metadata_all & (~nir_metadata_instr_index));
+ nir_metadata_preserve(nir_shader_get_entrypoint(shader), nir_metadata_all & (~nir_metadata_instr_index));
}
static struct radv_shader *
radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
- const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
- const struct radv_pipeline_key *pipeline_key, struct radv_pipeline_stage *stage,
- uint32_t *stack_size)
+ const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const struct radv_pipeline_key *pipeline_key,
+ struct radv_pipeline_stage *stage, uint32_t *stack_size)
{
struct radv_shader_binary *binary;
RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
for (uint32_t i = 0; i < num_shaders; i++) {
struct radv_pipeline_stage temp_stage = *stage;
temp_stage.nir = shaders[i];
- radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size,
- i > 0);
+ radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0);
radv_optimize_nir(temp_stage.nir, pipeline_key->optimisations_disabled);
radv_postprocess_nir(device, pipeline_layout, pipeline_key, MESA_SHADER_NONE, &temp_stage);
/* Compile NIR shader to AMD assembly. */
struct radv_shader *shader;
- shader = radv_shader_nir_to_asm(device, cache, stage, shaders, num_shaders, pipeline_key,
- keep_executable_info, keep_statistic_info, &binary);
+ shader = radv_shader_nir_to_asm(device, cache, stage, shaders, num_shaders, pipeline_key, keep_executable_info,
+ keep_statistic_info, &binary);
if (shader && keep_executable_info && stage->spirv.size) {
shader->spirv = malloc(stage->spirv.size);
radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *cache,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const VkPipelineCreationFeedbackCreateInfo *creation_feedback,
- const struct radv_pipeline_key *key,
- struct radv_ray_tracing_pipeline *pipeline)
+ const struct radv_pipeline_key *key, struct radv_ray_tracing_pipeline *pipeline)
{
if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
return VK_PIPELINE_COMPILE_REQUIRED;
if (radv_ray_tracing_stage_is_compiled(&stages[idx])) {
uint32_t stack_size = 0;
- struct radv_shader *shader =
- radv_rt_nir_to_asm(device, cache, pCreateInfo, key, &stage, &stack_size);
+ struct radv_shader *shader = radv_rt_nir_to_asm(device, cache, pCreateInfo, key, &stage, &stack_size);
stages[idx].stack_size = stack_size;
stages[idx].shader = shader ? &shader->base : NULL;
} else {
uint8_t shader_sha1[SHA1_DIGEST_LENGTH];
radv_hash_shaders(shader_sha1, &stage, 1, NULL, key, radv_get_hash_flags(device, false));
stages[idx].stack_size = stage.nir->scratch_size;
- stages[idx].shader = radv_pipeline_cache_nir_to_handle(
- device, cache, stage.nir, shader_sha1, !key->optimisations_disabled);
+ stages[idx].shader =
+ radv_pipeline_cache_nir_to_handle(device, cache, stage.nir, shader_sha1, !key->optimisations_disabled);
}
ralloc_free(stage.nir);
return false;
for (unsigned i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; ++i) {
- if (pCreateInfo->pDynamicState->pDynamicStates[i] ==
- VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
+ if (pCreateInfo->pDynamicState->pDynamicStates[i] == VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
return true;
}
}
static void
-compute_rt_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
- struct radv_ray_tracing_pipeline *pipeline)
+compute_rt_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct radv_ray_tracing_pipeline *pipeline)
{
if (radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo)) {
pipeline->stack_size = -1u;
}
pipeline->stack_size =
raygen_size +
- MIN2(pCreateInfo->maxPipelineRayRecursionDepth, 1) *
- MAX2(chit_miss_size, intersection_size + any_hit_size) +
- MAX2(0, (int)(pCreateInfo->maxPipelineRayRecursionDepth) - 1) * chit_miss_size +
- 2 * callable_size;
+ MIN2(pCreateInfo->maxPipelineRayRecursionDepth, 1) * MAX2(chit_miss_size, intersection_size + any_hit_size) +
+ MAX2(0, (int)(pCreateInfo->maxPipelineRayRecursionDepth) - 1) * chit_miss_size + 2 * callable_size;
}
static void
config->spilled_sgprs = MAX2(config->spilled_sgprs, other->spilled_sgprs);
config->spilled_vgprs = MAX2(config->spilled_vgprs, other->spilled_vgprs);
config->lds_size = MAX2(config->lds_size, other->lds_size);
- config->scratch_bytes_per_wave =
- MAX2(config->scratch_bytes_per_wave, other->scratch_bytes_per_wave);
+ config->scratch_bytes_per_wave = MAX2(config->scratch_bytes_per_wave, other->scratch_bytes_per_wave);
assert(config->float_mode == other->float_mode);
}
static void
-postprocess_rt_config(struct ac_shader_config *config, enum amd_gfx_level gfx_level,
- unsigned wave_size)
+postprocess_rt_config(struct ac_shader_config *config, enum amd_gfx_level gfx_level, unsigned wave_size)
{
- config->rsrc1 = (config->rsrc1 & C_00B848_VGPRS) |
- S_00B848_VGPRS((config->num_vgprs - 1) / (wave_size == 32 ? 8 : 4));
+ config->rsrc1 =
+ (config->rsrc1 & C_00B848_VGPRS) | S_00B848_VGPRS((config->num_vgprs - 1) / (wave_size == 32 ? 8 : 4));
if (gfx_level < GFX10)
- config->rsrc1 =
- (config->rsrc1 & C_00B848_SGPRS) | S_00B848_SGPRS((config->num_sgprs - 1) / 8);
+ config->rsrc1 = (config->rsrc1 & C_00B848_SGPRS) | S_00B848_SGPRS((config->num_sgprs - 1) / 8);
config->rsrc2 = (config->rsrc2 & C_00B84C_LDS_SIZE) | S_00B84C_LDS_SIZE(config->lds_size);
- config->rsrc3 = (config->rsrc3 & C_00B8A0_SHARED_VGPR_CNT) |
- S_00B8A0_SHARED_VGPR_CNT(config->num_shared_vgprs / 8);
+ config->rsrc3 = (config->rsrc3 & C_00B8A0_SHARED_VGPR_CNT) | S_00B8A0_SHARED_VGPR_CNT(config->num_shared_vgprs / 8);
}
static void
struct ac_shader_config *config = &pipeline->base.base.shaders[MESA_SHADER_COMPUTE]->config;
for (unsigned i = 0; i < pipeline->stage_count; i++) {
if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i])) {
- struct radv_shader *shader =
- container_of(pipeline->stages[i].shader, struct radv_shader, base);
+ struct radv_shader *shader = container_of(pipeline->stages[i].shader, struct radv_shader, base);
combine_config(config, &shader->config);
}
}
combine_config(config, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config);
- postprocess_rt_config(config, device->physical_device->rad_info.gfx_level,
- device->physical_device->rt_wave_size);
+ postprocess_rt_config(config, device->physical_device->rad_info.gfx_level, device->physical_device->rt_wave_size);
}
static VkResult
-radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
- const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
+radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
{
RADV_FROM_HANDLE(radv_device, device, _device);
int64_t pipeline_start = os_time_get_nano();
- VkRayTracingPipelineCreateInfoKHR local_create_info =
- radv_create_merged_rt_create_info(pCreateInfo);
+ VkRayTracingPipelineCreateInfoKHR local_create_info = radv_create_merged_rt_create_info(pCreateInfo);
VK_MULTIALLOC(ma);
VK_MULTIALLOC_DECL(&ma, struct radv_ray_tracing_pipeline, pipeline, 1);
VK_MULTIALLOC_DECL(&ma, struct radv_ray_tracing_stage, stages, local_create_info.stageCount);
VK_MULTIALLOC_DECL(&ma, struct radv_ray_tracing_group, groups, local_create_info.groupCount);
- if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
+ if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
return VK_ERROR_OUT_OF_HOST_MEMORY;
radv_pipeline_init(device, &pipeline->base.base, RADV_PIPELINE_RAY_TRACING);
if (result != VK_SUCCESS)
goto fail;
- struct radv_pipeline_key key =
- radv_generate_pipeline_key(device, &pipeline->base.base, pCreateInfo->flags);
+ struct radv_pipeline_key key = radv_generate_pipeline_key(device, &pipeline->base.base, pCreateInfo->flags);
radv_hash_rt_shaders(pipeline->sha1, pCreateInfo, &key, pipeline->groups,
radv_get_hash_flags(device, keep_statistic_info));
cache_hit = radv_ray_tracing_pipeline_cache_search(device, cache, pipeline, pCreateInfo);
if (!cache_hit) {
- result =
- radv_rt_compile_shaders(device, cache, pCreateInfo, creation_feedback, &key, pipeline);
+ result = radv_rt_compile_shaders(device, cache, pCreateInfo, creation_feedback, &key, pipeline);
if (result != VK_SUCCESS)
goto fail;
}
if (!cache_hit)
- radv_ray_tracing_pipeline_cache_insert(device, cache, pipeline, pCreateInfo->stageCount,
- pipeline->sha1);
+ radv_ray_tracing_pipeline_cache_insert(device, cache, pipeline, pCreateInfo->stageCount, pipeline->sha1);
/* write shader VAs into group handles */
for (unsigned i = 0; i < pipeline->group_count; i++) {
if (pipeline->groups[i].recursive_shader != VK_SHADER_UNUSED_KHR) {
struct radv_shader *shader =
- container_of(pipeline->stages[pipeline->groups[i].recursive_shader].shader,
- struct radv_shader, base);
- pipeline->groups[i].handle.recursive_shader_ptr =
- shader->va | radv_get_rt_priority(shader->info.stage);
+ container_of(pipeline->stages[pipeline->groups[i].recursive_shader].shader, struct radv_shader, base);
+ pipeline->groups[i].handle.recursive_shader_ptr = shader->va | radv_get_rt_priority(shader->info.stage);
}
}
}
void
-radv_destroy_ray_tracing_pipeline(struct radv_device *device,
- struct radv_ray_tracing_pipeline *pipeline)
+radv_destroy_ray_tracing_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
{
for (unsigned i = 0; i < pipeline->stage_count; i++) {
if (pipeline->stages[i].shader)
unsigned i = 0;
for (; i < count; i++) {
VkResult r;
- r = radv_rt_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator,
- &pPipelines[i]);
+ r = radv_rt_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i]);
if (r != VK_SUCCESS) {
result = r;
pPipelines[i] = VK_NULL_HANDLE;
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_GetRayTracingShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, uint32_t firstGroup,
- uint32_t groupCount, size_t dataSize, void *pData)
+radv_GetRayTracingShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, uint32_t firstGroup, uint32_t groupCount,
+ size_t dataSize, void *pData)
{
RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
struct radv_ray_tracing_group *groups = radv_pipeline_to_ray_tracing(pipeline)->groups;
memset(data, 0, groupCount * RADV_RT_HANDLE_SIZE);
for (uint32_t i = 0; i < groupCount; ++i) {
- memcpy(data + i * RADV_RT_HANDLE_SIZE, &groups[firstGroup + i].handle,
- sizeof(struct radv_pipeline_group_handle));
+ memcpy(data + i * RADV_RT_HANDLE_SIZE, &groups[firstGroup + i].handle, sizeof(struct radv_pipeline_group_handle));
}
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice device, VkPipeline pipeline,
- uint32_t firstGroup, uint32_t groupCount,
- size_t dataSize, void *pData)
+radv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice device, VkPipeline pipeline, uint32_t firstGroup,
+ uint32_t groupCount, size_t dataSize, void *pData)
{
- return radv_GetRayTracingShaderGroupHandlesKHR(device, pipeline, firstGroup, groupCount,
- dataSize, pData);
+ return radv_GetRayTracingShaderGroupHandlesKHR(device, pipeline, firstGroup, groupCount, dataSize, pData);
}
#include "vk_debug_report.h"
#include "vk_device.h"
#include "vk_format.h"
+#include "vk_image.h"
#include "vk_instance.h"
#include "vk_log.h"
#include "vk_physical_device.h"
-#include "vk_shader_module.h"
#include "vk_queue.h"
+#include "vk_shader_module.h"
#include "vk_util.h"
-#include "vk_image.h"
-#include "vk_ycbcr_conversion.h"
#include "vk_video.h"
+#include "vk_ycbcr_conversion.h"
#include "rmv/vk_rmv_common.h"
#include "rmv/vk_rmv_tokens.h"
#include "wsi_common.h"
#ifdef __cplusplus
-extern "C"
-{
+extern "C" {
#endif
/* Helper to determine if we should compile
#define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 0
#endif
-#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) || \
- defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_DISPLAY_KHR)
+#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) || defined(VK_USE_PLATFORM_XLIB_KHR) || \
+ defined(VK_USE_PLATFORM_DISPLAY_KHR)
#define RADV_USE_WSI_PLATFORM
#endif
/* A non-fatal assert. Useful for debugging. */
#ifdef NDEBUG
-#define radv_assert(x) \
- do { \
+#define radv_assert(x) \
+ do { \
} while (0)
#else
-#define radv_assert(x) \
- do { \
- if (unlikely(!(x))) \
- fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \
+#define radv_assert(x) \
+ do { \
+ if (unlikely(!(x))) \
+ fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \
} while (0)
#endif
bool *found_in_application_cache);
void radv_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache,
- struct radv_pipeline *pipeline,
- struct radv_shader_part_binary *ps_epilog_binary,
+ struct radv_pipeline *pipeline, struct radv_shader_part_binary *ps_epilog_binary,
const unsigned char *sha1);
struct radv_ray_tracing_pipeline;
-bool radv_ray_tracing_pipeline_cache_search(struct radv_device *device,
- struct vk_pipeline_cache *cache,
+bool radv_ray_tracing_pipeline_cache_search(struct radv_device *device, struct vk_pipeline_cache *cache,
struct radv_ray_tracing_pipeline *pipeline,
const VkRayTracingPipelineCreateInfoKHR *create_info);
-void radv_ray_tracing_pipeline_cache_insert(struct radv_device *device,
- struct vk_pipeline_cache *cache,
- struct radv_ray_tracing_pipeline *pipeline,
- unsigned num_stages, const unsigned char *sha1);
-
-struct vk_pipeline_cache_object *radv_pipeline_cache_search_nir(struct radv_device *device,
- struct vk_pipeline_cache *cache,
- const unsigned char *sha1);
+void radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache,
+ struct radv_ray_tracing_pipeline *pipeline, unsigned num_stages,
+ const unsigned char *sha1);
struct vk_pipeline_cache_object *
-radv_pipeline_cache_nir_to_handle(struct radv_device *device, struct vk_pipeline_cache *cache,
- struct nir_shader *nir, const unsigned char *sha1, bool cached);
+radv_pipeline_cache_search_nir(struct radv_device *device, struct vk_pipeline_cache *cache, const unsigned char *sha1);
+
+struct vk_pipeline_cache_object *radv_pipeline_cache_nir_to_handle(struct radv_device *device,
+ struct vk_pipeline_cache *cache,
+ struct nir_shader *nir, const unsigned char *sha1,
+ bool cached);
struct nir_shader *radv_pipeline_cache_handle_to_nir(struct radv_device *device,
struct vk_pipeline_cache_object *object);
static inline enum radv_blit_ds_layout
radv_meta_blit_ds_to_type(VkImageLayout layout)
{
- return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE
- : RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
+ return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE : RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
}
static inline VkImageLayout
radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)
{
- return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
- : VK_IMAGE_LAYOUT_GENERAL;
+ return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
}
enum radv_meta_dst_layout {
static inline enum radv_meta_dst_layout
radv_meta_dst_layout_from_layout(VkImageLayout layout)
{
- return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL
- : RADV_META_DST_LAYOUT_OPTIMAL;
+ return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL : RADV_META_DST_LAYOUT_OPTIMAL;
}
static inline VkImageLayout
radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)
{
- return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
- : VK_IMAGE_LAYOUT_GENERAL;
+ return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
}
struct radv_meta_state {
static inline enum radv_queue_family
vk_queue_to_radv(const struct radv_physical_device *phys_dev, int queue_family_index)
{
- if (queue_family_index == VK_QUEUE_FAMILY_EXTERNAL ||
- queue_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT)
+ if (queue_family_index == VK_QUEUE_FAMILY_EXTERNAL || queue_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT)
return RADV_QUEUE_FOREIGN;
if (queue_family_index == VK_QUEUE_FAMILY_IGNORED)
return RADV_QUEUE_IGNORED;
}
enum amd_ip_type radv_queue_family_to_ring(const struct radv_physical_device *physical_device,
- enum radv_queue_family f);
+ enum radv_queue_family f);
static inline bool
radv_has_uvd(struct radv_physical_device *phys_dev)
void radv_queue_finish(struct radv_queue *queue);
-enum radeon_ctx_priority
-radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoKHR *pObj);
+enum radeon_ctx_priority radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoKHR *pObj);
struct radv_shader_dma_submission {
struct list_head list;
#endif
};
-void radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device,
- struct radeon_winsys_bo *bo);
+void radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device, struct radeon_winsys_bo *bo);
void radv_device_memory_finish(struct radv_device_memory *mem);
struct radv_descriptor_range {
VkDeviceSize offset;
};
-void radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device,
- struct radeon_winsys_bo *bo, uint64_t size, uint64_t offset);
+void radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device, struct radeon_winsys_bo *bo,
+ uint64_t size, uint64_t offset);
void radv_buffer_finish(struct radv_buffer *buffer);
enum radv_dynamic_state_bits {
RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15,
RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16,
- RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER =
- (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META),
+ RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META),
- RADV_CMD_FLUSH_ALL_COMPUTE =
- (RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
- RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_CS_PARTIAL_FLUSH),
+ RADV_CMD_FLUSH_ALL_COMPUTE = (RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
+ RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_CS_PARTIAL_FLUSH),
};
enum radv_nggc_settings {
* The follower writes the value, and the leader waits.
*/
struct {
- uint64_t va; /* Virtual address of the semaphore. */
- uint32_t leader_value; /* Current value of the leader. */
- uint32_t emitted_leader_value; /* Emitted value emitted by the leader. */
+ uint64_t va; /* Virtual address of the semaphore. */
+ uint32_t leader_value; /* Current value of the leader. */
+ uint32_t emitted_leader_value; /* Emitted value emitted by the leader. */
} sem;
} gang;
uint64_t va;
};
-void radv_compute_dispatch(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_dispatch_info *info);
+void radv_compute_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info);
struct radv_image;
struct radv_image_view;
void cik_create_gfx_config(struct radv_device *device);
-void si_write_scissors(struct radeon_cmdbuf *cs, int count, const VkRect2D *scissors,
- const VkViewport *viewports);
+void si_write_scissors(struct radeon_cmdbuf *cs, int count, const VkRect2D *scissors, const VkViewport *viewports);
-void si_write_guardband(struct radeon_cmdbuf *cs, int count, const VkViewport *viewports,
- unsigned rast_prim, unsigned polygon_mode, float line_width);
+void si_write_guardband(struct radeon_cmdbuf *cs, int count, const VkViewport *viewports, unsigned rast_prim,
+ unsigned polygon_mode, float line_width);
-VkResult radv_create_shadow_regs_preamble(const struct radv_device *device,
- struct radv_queue_state *queue_state);
-void radv_destroy_shadow_regs_preamble(struct radv_queue_state *queue_state,
- struct radeon_winsys *ws);
+VkResult radv_create_shadow_regs_preamble(const struct radv_device *device, struct radv_queue_state *queue_state);
+void radv_destroy_shadow_regs_preamble(struct radv_queue_state *queue_state, struct radeon_winsys *ws);
void radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_device *device,
struct radv_queue_state *queue_state);
-VkResult radv_init_shadowed_regs_buffer_state(const struct radv_device *device,
- struct radv_queue *queue);
-
-uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
- bool indirect_draw, bool count_from_stream_output,
- uint32_t draw_vertex_count, unsigned topology,
- bool prim_restart_enable, unsigned patch_control_points,
- unsigned num_tess_patches);
-void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec,
- unsigned event, unsigned event_flags, unsigned dst_sel,
- unsigned data_sel, uint64_t va, uint32_t new_fence,
- uint64_t gfx9_eop_bug_va);
+VkResult radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct radv_queue *queue);
+
+uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, bool indirect_draw,
+ bool count_from_stream_output, uint32_t draw_vertex_count, unsigned topology,
+ bool prim_restart_enable, unsigned patch_control_points, unsigned num_tess_patches);
+void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec, unsigned event,
+ unsigned event_flags, unsigned dst_sel, unsigned data_sel, uint64_t va,
+ uint32_t new_fence, uint64_t gfx9_eop_bug_va);
struct radv_vgt_shader_key {
uint8_t tess : 1;
uint8_t vs_wave32 : 1;
};
-void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref,
- uint32_t mask);
-void si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs,
- enum amd_gfx_level gfx_level, uint32_t *flush_cnt, uint64_t flush_va,
- bool is_mec, enum radv_cmd_flush_bits flush_bits,
+void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask);
+void si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
+ uint32_t *flush_cnt, uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits,
enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va);
void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
-void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible,
- unsigned pred_op, uint64_t va);
-void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va,
- uint64_t size);
-void si_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
- unsigned size, bool predicating);
+void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, unsigned pred_op,
+ uint64_t va);
+void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, uint64_t size);
+void si_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size,
+ bool predicating);
void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size);
-void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size,
- unsigned value);
+void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, unsigned value);
void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
uint32_t radv_get_pa_su_sc_mode_cntl(const struct radv_cmd_buffer *cmd_buffer);
uint32_t radv_hash_ps_epilog(const void *key_);
bool radv_cmp_ps_epilog(const void *a_, const void *b_);
-struct radv_ps_epilog_state
-{
+struct radv_ps_epilog_state {
uint8_t color_attachment_count;
VkFormat color_attachment_formats[MAX_RTS];
const struct radv_ps_epilog_state *state,
bool disable_mrt_compaction);
-bool radv_needs_null_export_workaround(const struct radv_device *device,
- const struct radv_shader *ps, unsigned custom_blend_mode);
+bool radv_needs_null_export_workaround(const struct radv_device *device, const struct radv_shader *ps,
+ unsigned custom_blend_mode);
void radv_cmd_buffer_reset_rendering(struct radv_cmd_buffer *cmd_buffer);
-bool radv_cmd_buffer_upload_alloc_aligned(struct radv_cmd_buffer *cmd_buffer, unsigned size,
- unsigned alignment,
+bool radv_cmd_buffer_upload_alloc_aligned(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned alignment,
unsigned *out_offset, void **ptr);
-bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
- unsigned *out_offset, void **ptr);
-bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size,
- const void *data, unsigned *out_offset);
+bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr);
+bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data,
+ unsigned *out_offset);
void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer,
- const struct radv_graphics_pipeline *pipeline,
- bool full_null_descriptors, void *vb_ptr);
+ const struct radv_graphics_pipeline *pipeline, bool full_null_descriptors,
+ void *vb_ptr);
void radv_write_scissors(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs);
-void radv_cmd_buffer_clear_attachment(struct radv_cmd_buffer *cmd_buffer,
- const VkClearAttachment *attachment);
-void radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer *cmd_buffer,
- const VkRenderingInfo *render_info);
+void radv_cmd_buffer_clear_attachment(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *attachment);
+void radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer *cmd_buffer, const VkRenderingInfo *render_info);
void radv_cmd_buffer_resolve_rendering(struct radv_cmd_buffer *cmd_buffer);
-void radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *src_iview,
- VkImageLayout src_layout,
- struct radv_image_view *dst_iview,
- VkImageLayout dst_layout,
- const VkImageResolve2 *region);
-void radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer,
- VkImageAspectFlags aspects,
+void radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
+ VkImageLayout src_layout, struct radv_image_view *dst_iview,
+ VkImageLayout dst_layout, const VkImageResolve2 *region);
+void radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlags aspects,
VkResolveModeFlagBits resolve_mode);
-void radv_cmd_buffer_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *src_iview,
- VkImageLayout src_layout,
- struct radv_image_view *dst_iview,
- VkImageLayout dst_layout);
-void radv_depth_stencil_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer,
- VkImageAspectFlags aspects,
+void radv_cmd_buffer_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
+ VkImageLayout src_layout, struct radv_image_view *dst_iview,
+ VkImageLayout dst_layout);
+void radv_depth_stencil_resolve_rendering_fs(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlags aspects,
VkResolveModeFlagBits resolve_mode);
void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples);
unsigned radv_get_default_max_sample_dist(int log_samples);
void radv_device_init_msaa(struct radv_device *device);
VkResult radv_device_init_vrs_state(struct radv_device *device);
-void radv_emit_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va,
- uint32_t imm);
+void radv_emit_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm);
-void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview,
- VkClearDepthStencilValue ds_clear_value,
- VkImageAspectFlags aspects);
+void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
+ VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects);
-void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview, int cb_idx,
- uint32_t color_values[2]);
+void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
+ int cb_idx, uint32_t color_values[2]);
-bool radv_image_use_dcc_image_stores(const struct radv_device *device,
- const struct radv_image *image);
-bool radv_image_use_dcc_predication(const struct radv_device *device,
- const struct radv_image *image);
+bool radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image);
+bool radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image);
void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *range, bool value);
void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *range, bool value);
-enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
- VkAccessFlags2 src_flags,
+enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 src_flags,
const struct radv_image *image);
-enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
- VkAccessFlags2 dst_flags,
+enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_flags,
const struct radv_image *image);
uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
struct radeon_winsys_bo *bo, uint64_t va, uint64_t size, uint32_t value);
void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo,
- struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset,
- uint64_t size);
+ struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset, uint64_t size);
void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
bool radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD);
}
static inline void
-radv_emit_shader_pointer(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset,
- uint64_t va, bool global)
+radv_emit_shader_pointer(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset, uint64_t va,
+ bool global)
{
bool use_32bit_pointers = !global;
}
static inline const struct radv_push_constant_state *
-radv_get_push_constants_state(const struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint bind_point)
+radv_get_push_constants_state(const struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
{
return &cmd_buffer->push_constant_state[vk_to_bind_point(bind_point)];
}
-void
-radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3]);
+void radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3]);
/*
* Takes x,y,z as exact numbers of invocations, instead of blocks.
* Limitations: Can't call normal dispatch functions without binding or rebinding
* the compute pipeline.
*/
-void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y,
- uint32_t z);
+void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z);
-void radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo,
- uint64_t va);
+void radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t va);
struct radv_event {
struct vk_object_base base;
uint64_t *map;
};
-#define RADV_HASH_SHADER_CS_WAVE32 (1 << 1)
-#define RADV_HASH_SHADER_PS_WAVE32 (1 << 2)
-#define RADV_HASH_SHADER_GE_WAVE32 (1 << 3)
-#define RADV_HASH_SHADER_LLVM (1 << 4)
-#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8)
-#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13)
-#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14)
+#define RADV_HASH_SHADER_CS_WAVE32 (1 << 1)
+#define RADV_HASH_SHADER_PS_WAVE32 (1 << 2)
+#define RADV_HASH_SHADER_GE_WAVE32 (1 << 3)
+#define RADV_HASH_SHADER_LLVM (1 << 4)
+#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8)
+#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13)
+#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14)
#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15)
#define RADV_HASH_SHADER_EMULATE_RT (1 << 16)
#define RADV_HASH_SHADER_SPLIT_FMA (1 << 17)
struct radv_pipeline_key;
struct radv_ray_tracing_group;
-void radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo,
- struct radv_pipeline_stage *out_stage, gl_shader_stage stage);
+void radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo, struct radv_pipeline_stage *out_stage,
+ gl_shader_stage stage);
-void radv_hash_shaders(unsigned char *hash, const struct radv_pipeline_stage *stages,
- uint32_t stage_count, const struct radv_pipeline_layout *layout,
- const struct radv_pipeline_key *key, uint32_t flags);
+void radv_hash_shaders(unsigned char *hash, const struct radv_pipeline_stage *stages, uint32_t stage_count,
+ const struct radv_pipeline_layout *layout, const struct radv_pipeline_key *key, uint32_t flags);
-void radv_hash_rt_stages(struct mesa_sha1 *ctx, const VkPipelineShaderStageCreateInfo *stages,
- unsigned stage_count);
+void radv_hash_rt_stages(struct mesa_sha1 *ctx, const VkPipelineShaderStageCreateInfo *stages, unsigned stage_count);
void radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
- const struct radv_pipeline_key *key,
- const struct radv_ray_tracing_group *groups, uint32_t flags);
+ const struct radv_pipeline_key *key, const struct radv_ray_tracing_group *groups,
+ uint32_t flags);
uint32_t radv_get_hash_flags(const struct radv_device *device, bool stats);
bool radv_emulate_rt(const struct radv_physical_device *pdevice);
enum {
- RADV_RT_STAGE_BITS = (VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
- VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR |
- VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR)
+ RADV_RT_STAGE_BITS =
+ (VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
+ VK_SHADER_STAGE_MISS_BIT_KHR | VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR)
};
#define RADV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
-#define radv_foreach_stage(stage, stage_bits) \
- for (gl_shader_stage stage, __tmp = (gl_shader_stage)((stage_bits)&RADV_STAGE_MASK); \
- stage = ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage)))
+#define radv_foreach_stage(stage, stage_bits) \
+ for (gl_shader_stage stage, __tmp = (gl_shader_stage)((stage_bits)&RADV_STAGE_MASK); stage = ffs(__tmp) - 1, __tmp; \
+ __tmp &= ~(1 << (stage)))
extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS];
unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format);
};
enum radv_depth_clamp_mode {
- RADV_DEPTH_CLAMP_MODE_VIEWPORT = 0, /* Clamp to the viewport min/max depth bounds */
- RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE = 1, /* Clamp between 0.0f and 1.0f */
- RADV_DEPTH_CLAMP_MODE_DISABLED = 2, /* Disable depth clamping */
+ RADV_DEPTH_CLAMP_MODE_VIEWPORT = 0, /* Clamp to the viewport min/max depth bounds */
+ RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE = 1, /* Clamp between 0.0f and 1.0f */
+ RADV_DEPTH_CLAMP_MODE_DISABLED = 2, /* Disable depth clamping */
};
struct radv_pipeline {
VkPipelineShaderStageCreateInfo *stages;
};
-#define RADV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \
- static inline struct radv_##pipe_type##_pipeline * \
- radv_pipeline_to_##pipe_type(struct radv_pipeline *pipeline) \
- { \
- assert(pipeline->type == pipe_enum); \
- return (struct radv_##pipe_type##_pipeline *) pipeline; \
+#define RADV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \
+ static inline struct radv_##pipe_type##_pipeline *radv_pipeline_to_##pipe_type(struct radv_pipeline *pipeline) \
+ { \
+ assert(pipeline->type == pipe_enum); \
+ return (struct radv_##pipe_type##_pipeline *)pipeline; \
}
RADV_DECL_PIPELINE_DOWNCAST(graphics, RADV_PIPELINE_GRAPHICS)
void radv_pipeline_emit_hw_cs(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
const struct radv_shader *shader);
-void radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice,
- struct radeon_cmdbuf *cs, const struct radv_shader *shader);
+void radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
+ const struct radv_shader *shader);
-bool radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
- unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high,
- void *data);
+bool radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components,
+ nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data);
-void radv_compute_pipeline_init(const struct radv_device *device,
- struct radv_compute_pipeline *pipeline,
+void radv_compute_pipeline_init(const struct radv_device *device, struct radv_compute_pipeline *pipeline,
const struct radv_pipeline_layout *layout);
struct radv_graphics_pipeline_create_info {
};
struct radv_pipeline_key radv_generate_pipeline_key(const struct radv_device *device,
- const struct radv_pipeline *pipeline,
- VkPipelineCreateFlags flags);
+ const struct radv_pipeline *pipeline, VkPipelineCreateFlags flags);
-void radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline,
- enum radv_pipeline_type type);
+void radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline, enum radv_pipeline_type type);
VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
const VkComputePipelineCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkPipeline *pPipeline);
+ const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline);
bool radv_pipeline_capture_shaders(const struct radv_device *device, VkPipelineCreateFlags flags);
-bool radv_pipeline_capture_shader_stats(const struct radv_device *device,
- VkPipelineCreateFlags flags);
+bool radv_pipeline_capture_shader_stats(const struct radv_device *device, VkPipelineCreateFlags flags);
-VkPipelineShaderStageCreateInfo *
-radv_copy_shader_stage_create_info(struct radv_device *device, uint32_t stageCount,
- const VkPipelineShaderStageCreateInfo *pStages, void *mem_ctx);
+VkPipelineShaderStageCreateInfo *radv_copy_shader_stage_create_info(struct radv_device *device, uint32_t stageCount,
+ const VkPipelineShaderStageCreateInfo *pStages,
+ void *mem_ctx);
bool radv_shader_need_indirect_descriptor_sets(const struct radv_shader *shader);
const VkAllocationCallbacks *allocator);
struct vk_format_description;
-uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc,
- int first_non_void);
-uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc,
- int first_non_void);
+uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc, int first_non_void);
+uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc, int first_non_void);
bool radv_is_buffer_format_supported(VkFormat format, bool *scaled);
uint32_t radv_colorformat_endian_swap(uint32_t colorformat);
unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap);
uint32_t radv_translate_dbformat(VkFormat format);
-uint32_t radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc,
- int first_non_void);
-uint32_t radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc,
- int first_non_void);
-bool radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2],
- VkClearColorValue *value);
-bool radv_is_storage_image_format_supported(const struct radv_physical_device *physical_device,
- VkFormat format);
-bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice,
- VkFormat format, bool *blendable);
+uint32_t radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc, int first_non_void);
+uint32_t radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc, int first_non_void);
+bool radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2], VkClearColorValue *value);
+bool radv_is_storage_image_format_supported(const struct radv_physical_device *physical_device, VkFormat format);
+bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice, VkFormat format, bool *blendable);
bool radv_dcc_formats_compatible(enum amd_gfx_level gfx_level, VkFormat format1, VkFormat format2,
bool *sign_reinterpret);
bool radv_is_atomic_format_supported(VkFormat format);
bool radv_device_supports_etc(const struct radv_physical_device *physical_device);
static const VkImageUsageFlags RADV_IMAGE_USAGE_WRITE_BITS =
- VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
- VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_STORAGE_BIT;
struct radv_image_plane {
VkFormat format;
struct radv_image_plane planes[0];
};
-struct ac_surf_info radv_get_ac_surf_info(struct radv_device *device,
- const struct radv_image *image);
+struct ac_surf_info radv_get_ac_surf_info(struct radv_device *device, const struct radv_image *image);
/* Whether the image has a htile that is known consistent with the contents of
* the image and is allowed to be in compressed form.
* If this is false reads that don't use the htile should be able to return
* correct results.
*/
-bool radv_layout_is_htile_compressed(const struct radv_device *device,
- const struct radv_image *image, VkImageLayout layout,
- unsigned queue_mask);
+bool radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image,
+ VkImageLayout layout, unsigned queue_mask);
-bool radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
- unsigned level, VkImageLayout layout, unsigned queue_mask);
+bool radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image, unsigned level,
+ VkImageLayout layout, unsigned queue_mask);
-bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
- unsigned level, VkImageLayout layout, unsigned queue_mask);
+bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, unsigned level,
+ VkImageLayout layout, unsigned queue_mask);
enum radv_fmask_compression {
RADV_FMASK_COMPRESSION_NONE,
RADV_FMASK_COMPRESSION_FULL,
};
-enum radv_fmask_compression radv_layout_fmask_compression(
- const struct radv_device *device, const struct radv_image *image, VkImageLayout layout,
- unsigned queue_mask);
+enum radv_fmask_compression radv_layout_fmask_compression(const struct radv_device *device,
+ const struct radv_image *image, VkImageLayout layout,
+ unsigned queue_mask);
/**
* Return whether the image has CMASK metadata for color surfaces.
static inline bool
radv_image_has_dcc(const struct radv_image *image)
{
- return !(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) &&
- image->planes[0].surface.meta_offset;
+ return !(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) && image->planes[0].surface.meta_offset;
}
/**
static inline bool
radv_image_has_htile(const struct radv_image *image)
{
- return image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER &&
- image->planes[0].surface.meta_size;
+ return image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER && image->planes[0].surface.meta_size;
}
/**
static inline bool
radv_image_is_tc_compat_htile(const struct radv_image *image)
{
- return radv_image_has_htile(image) &&
- (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
+ return radv_image_has_htile(image) && (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
}
/**
{
/* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */
return device->physical_device->rad_info.gfx_level >= GFX10 &&
- (image->vk.usage &
- (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
+ (image->vk.usage & (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
radv_image_is_tc_compat_htile(image) && image->vk.samples > 1;
}
-unsigned radv_image_queue_family_mask(const struct radv_image *image,
- enum radv_queue_family family,
+unsigned radv_image_queue_family_mask(const struct radv_image *image, enum radv_queue_family family,
enum radv_queue_family queue_family);
bool radv_image_is_renderable(const struct radv_device *device, const struct radv_image *image);
struct radeon_bo_metadata;
-void radv_init_metadata(struct radv_device *device, struct radv_image *image,
- struct radeon_bo_metadata *metadata);
+void radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata);
-void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
- uint64_t offset, uint32_t stride);
+void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, uint64_t offset,
+ uint32_t stride);
union radv_descriptor {
struct {
const struct radeon_bo_metadata *bo_metadata;
};
-VkResult
-radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
- const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
- const struct VkVideoProfileListInfoKHR *profile_list,
- struct radv_image *image);
+VkResult radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
+ const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
+ const struct VkVideoProfileListInfoKHR *profile_list, struct radv_image *image);
VkResult radv_image_create(VkDevice _device, const struct radv_image_create_info *info,
const VkAllocationCallbacks *alloc, VkImage *pImage, bool is_internal);
-bool radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
- VkFormat format, VkImageCreateFlags flags,
- bool *sign_reinterpret);
+bool radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext, VkFormat format,
+ VkImageCreateFlags flags, bool *sign_reinterpret);
bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format);
VkResult radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
- const VkNativeBufferANDROID *gralloc_info,
- const VkAllocationCallbacks *alloc, VkImage *out_image_h);
-VkResult radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
- unsigned priority,
+ const VkNativeBufferANDROID *gralloc_info, const VkAllocationCallbacks *alloc,
+ VkImage *out_image_h);
+VkResult radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, unsigned priority,
const VkImportAndroidHardwareBufferInfoANDROID *info);
-VkResult radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
- unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo);
+VkResult radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, unsigned priority,
+ const VkMemoryAllocateInfo *pAllocateInfo);
unsigned radv_ahb_format_for_vk_format(VkFormat vk_format);
};
void radv_image_view_init(struct radv_image_view *view, struct radv_device *device,
- const VkImageViewCreateInfo *pCreateInfo,
- VkImageCreateFlags img_create_flags,
+ const VkImageViewCreateInfo *pCreateInfo, VkImageCreateFlags img_create_flags,
const struct radv_image_view_extra_create_info *extra_create_info);
void radv_image_view_finish(struct radv_image_view *iview);
VkAccessFlags2 dst_access_mask;
};
-void radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_resolve_barrier *barrier);
+void radv_emit_resolve_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_resolve_barrier *barrier);
VkResult radv_device_init_meta(struct radv_device *device);
void radv_device_finish_meta(struct radv_device *device);
};
void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool);
-VkResult radv_pc_init_query_pool(struct radv_physical_device *pdevice,
- const VkQueryPoolCreateInfo *pCreateInfo,
+VkResult radv_pc_init_query_pool(struct radv_physical_device *pdevice, const VkQueryPoolCreateInfo *pCreateInfo,
struct radv_pc_query_pool *pool);
-void radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool,
- uint64_t va);
-void radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool,
- uint64_t va);
+void radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va);
+void radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va);
void radv_pc_get_results(const struct radv_pc_query_pool *pc_pool, const uint64_t *data, void *out);
-#define VL_MACROBLOCK_WIDTH 16
+#define VL_MACROBLOCK_WIDTH 16
#define VL_MACROBLOCK_HEIGHT 16
struct radv_vid_mem {
struct radv_device_memory *mem;
- VkDeviceSize offset;
- VkDeviceSize size;
+ VkDeviceSize offset;
+ VkDeviceSize size;
};
struct radv_video_session {
uint32_t stream_handle;
unsigned stream_type;
bool interlaced;
- enum {
- DPB_MAX_RES = 0,
- DPB_DYNAMIC_TIER_1,
- DPB_DYNAMIC_TIER_2
- } dpb_type;
+ enum { DPB_MAX_RES = 0, DPB_DYNAMIC_TIER_1, DPB_DYNAMIC_TIER_2 } dpb_type;
unsigned db_alignment;
struct radv_vid_mem sessionctx;
};
/* needed for ac_gpu_info codecs */
-#define RADV_VIDEO_FORMAT_UNKNOWN 0
-#define RADV_VIDEO_FORMAT_MPEG12 1 /**< MPEG1, MPEG2 */
-#define RADV_VIDEO_FORMAT_MPEG4 2 /**< DIVX, XVID */
-#define RADV_VIDEO_FORMAT_VC1 3 /**< WMV */
-#define RADV_VIDEO_FORMAT_MPEG4_AVC 4/**< H.264 */
-#define RADV_VIDEO_FORMAT_HEVC 5 /**< H.265 */
-#define RADV_VIDEO_FORMAT_JPEG 6 /**< JPEG */
-#define RADV_VIDEO_FORMAT_VP9 7 /**< VP9 */
-#define RADV_VIDEO_FORMAT_AV1 8 /**< AV1 */
+#define RADV_VIDEO_FORMAT_UNKNOWN 0
+#define RADV_VIDEO_FORMAT_MPEG12 1 /**< MPEG1, MPEG2 */
+#define RADV_VIDEO_FORMAT_MPEG4 2 /**< DIVX, XVID */
+#define RADV_VIDEO_FORMAT_VC1 3 /**< WMV */
+#define RADV_VIDEO_FORMAT_MPEG4_AVC 4 /**< H.264 */
+#define RADV_VIDEO_FORMAT_HEVC 5 /**< H.265 */
+#define RADV_VIDEO_FORMAT_JPEG 6 /**< JPEG */
+#define RADV_VIDEO_FORMAT_VP9 7 /**< VP9 */
+#define RADV_VIDEO_FORMAT_AV1 8 /**< AV1 */
bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs);
void radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
VkDescriptorSet overrideSet, uint32_t descriptorWriteCount,
- const VkWriteDescriptorSet *pDescriptorWrites,
- uint32_t descriptorCopyCount,
+ const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount,
const VkCopyDescriptorSet *pDescriptorCopies);
-void radv_cmd_update_descriptor_set_with_template(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
+void radv_cmd_update_descriptor_set_with_template(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
struct radv_descriptor_set *set,
VkDescriptorUpdateTemplate descriptorUpdateTemplate,
const void *pData);
-void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout,
- uint32_t set, uint32_t descriptorWriteCount,
+void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint,
+ VkPipelineLayout _layout, uint32_t set, uint32_t descriptorWriteCount,
const VkWriteDescriptorSet *pDescriptorWrites);
-void radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va,
- VkFormat vk_format, unsigned offset, unsigned range,
- uint32_t *state);
+void radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFormat vk_format, unsigned offset,
+ unsigned range, uint32_t *state);
uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *range, uint32_t value);
struct radv_nir_compiler_options;
struct radv_shader_info;
-void llvm_compile_shader(const struct radv_nir_compiler_options *options,
- const struct radv_shader_info *info, unsigned shader_count,
- struct nir_shader *const *shaders, struct radv_shader_binary **binary,
+void llvm_compile_shader(const struct radv_nir_compiler_options *options, const struct radv_shader_info *info,
+ unsigned shader_count, struct nir_shader *const *shaders, struct radv_shader_binary **binary,
const struct radv_shader_args *args);
/* radv_shader_info.h */
struct radv_shader_info;
-void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
- gl_shader_stage next_stage,
- const struct radv_pipeline_layout *layout,
- const struct radv_pipeline_key *pipeline_key,
- const enum radv_pipeline_type pipeline_type,
- bool consider_force_vrs,
+void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir, gl_shader_stage next_stage,
+ const struct radv_pipeline_layout *layout, const struct radv_pipeline_key *pipeline_key,
+ const enum radv_pipeline_type pipeline_type, bool consider_force_vrs,
struct radv_shader_info *info);
void radv_nir_shader_info_init(struct radv_shader_info *info);
-void radv_nir_shader_info_link(struct radv_device *device,
- const struct radv_pipeline_key *pipeline_key,
+void radv_nir_shader_info_link(struct radv_device *device, const struct radv_pipeline_key *pipeline_key,
struct radv_pipeline_stage *stages);
bool radv_sqtt_init(struct radv_device *device);
bool radv_end_sqtt(struct radv_queue *queue);
bool radv_get_sqtt_trace(struct radv_queue *queue, struct ac_sqtt_trace *sqtt_trace);
void radv_reset_sqtt_trace(struct radv_device *device);
-void radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data,
- uint32_t num_dwords);
+void radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords);
bool radv_is_instruction_timing_enabled(void);
bool radv_sqtt_sample_clocks(struct radv_device *device);
-void radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs,
- bool inhibit);
+void radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit);
void radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable);
int radv_rra_trace_frame(void);
VkResult radv_rra_dump_trace(VkQueue vk_queue, char *filename);
void radv_rra_trace_finish(VkDevice vk_device, struct radv_rra_trace_data *data);
-bool radv_sdma_copy_image(struct radv_device *device, struct radeon_cmdbuf *cs,
- struct radv_image *image, struct radv_buffer *buffer,
- const VkBufferImageCopy2 *region);
-void radv_sdma_copy_buffer(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va,
- uint64_t dst_va, uint64_t size);
+bool radv_sdma_copy_image(struct radv_device *device, struct radeon_cmdbuf *cs, struct radv_image *image,
+ struct radv_buffer *buffer, const VkBufferImageCopy2 *region);
+void radv_sdma_copy_buffer(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va,
+ uint64_t size);
void radv_memory_trace_init(struct radv_device *device);
-void radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo,
- uint32_t size, bool is_internal);
+void radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t size, bool is_internal);
void radv_rmv_log_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo);
void radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool is_internal,
VkMemoryAllocateFlags alloc_flags);
void radv_rmv_log_buffer_bind(struct radv_device *device, VkBuffer _buffer);
-void radv_rmv_log_image_create(struct radv_device *device, const VkImageCreateInfo *create_info,
- bool is_internal, VkImage _image);
+void radv_rmv_log_image_create(struct radv_device *device, const VkImageCreateInfo *create_info, bool is_internal,
+ VkImage _image);
void radv_rmv_log_image_bind(struct radv_device *device, VkImage _image);
void radv_rmv_log_query_pool_create(struct radv_device *device, VkQueryPool pool, bool is_internal);
void radv_rmv_log_command_buffer_bo_create(struct radv_device *device, struct radeon_winsys_bo *bo,
- uint32_t executable_size, uint32_t data_size,
- uint32_t scratch_size);
-void radv_rmv_log_command_buffer_bo_destroy(struct radv_device *device,
- struct radeon_winsys_bo *bo);
-void radv_rmv_log_border_color_palette_create(struct radv_device *device,
- struct radeon_winsys_bo *bo);
-void radv_rmv_log_border_color_palette_destroy(struct radv_device *device,
- struct radeon_winsys_bo *bo);
-void radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo,
- uint64_t offset);
-void radv_rmv_log_sparse_remove_residency(struct radv_device *device,
- struct radeon_winsys_bo *src_bo, uint64_t offset);
-void radv_rmv_log_descriptor_pool_create(struct radv_device *device,
- const VkDescriptorPoolCreateInfo *create_info,
+ uint32_t executable_size, uint32_t data_size, uint32_t scratch_size);
+void radv_rmv_log_command_buffer_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo);
+void radv_rmv_log_border_color_palette_create(struct radv_device *device, struct radeon_winsys_bo *bo);
+void radv_rmv_log_border_color_palette_destroy(struct radv_device *device, struct radeon_winsys_bo *bo);
+void radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset);
+void radv_rmv_log_sparse_remove_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset);
+void radv_rmv_log_descriptor_pool_create(struct radv_device *device, const VkDescriptorPoolCreateInfo *create_info,
VkDescriptorPool pool, bool is_internal);
void radv_rmv_log_graphics_pipeline_create(struct radv_device *device, VkPipelineCreateFlags flags,
struct radv_pipeline *pipeline, bool is_internal);
void radv_rmv_log_compute_pipeline_create(struct radv_device *device, VkPipelineCreateFlags flags,
struct radv_pipeline *pipeline, bool is_internal);
-void radv_rmv_log_event_create(struct radv_device *device, VkEvent event, VkEventCreateFlags flags,
- bool is_internal);
+void radv_rmv_log_event_create(struct radv_device *device, VkEvent event, VkEventCreateFlags flags, bool is_internal);
void radv_rmv_log_resource_destroy(struct radv_device *device, uint64_t handle);
void radv_rmv_log_submit(struct radv_device *device, enum amd_ip_type type);
-void radv_rmv_fill_device_info(const struct radv_physical_device *device,
- struct vk_rmv_device_info *info);
+void radv_rmv_fill_device_info(const struct radv_physical_device *device, struct vk_rmv_device_info *info);
void radv_rmv_collect_trace_events(struct radv_device *device);
void radv_memory_trace_finish(struct radv_device *device);
VkResult radv_create_buffer(struct radv_device *device, const VkBufferCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer,
- bool is_internal);
+ const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer, bool is_internal);
VkResult radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAllocateInfo,
- const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem,
- bool is_internal);
-VkResult radv_create_query_pool(struct radv_device *device,
- const VkQueryPoolCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool,
- bool is_internal);
-VkResult radv_create_descriptor_pool(struct radv_device *device,
- const VkDescriptorPoolCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkDescriptorPool *pDescriptorPool, bool is_internal);
+ const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem, bool is_internal);
+VkResult radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool, bool is_internal);
+VkResult radv_create_descriptor_pool(struct radv_device *device, const VkDescriptorPoolCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkDescriptorPool *pDescriptorPool,
+ bool is_internal);
VkResult radv_create_event(struct radv_device *device, const VkEventCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator, VkEvent *pEvent,
- bool is_internal);
+ const VkAllocationCallbacks *pAllocator, VkEvent *pEvent, bool is_internal);
/* radv_sqtt_layer_.c */
struct radv_barrier_data {
void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer);
void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z);
-void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer,
- VkImageAspectFlagBits aspects);
+void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlagBits aspects);
void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer);
void radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
void radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
-void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer,
- enum rgp_barrier_reason reason);
+void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason);
void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer);
void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer);
-void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_barrier_data *barrier);
+void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier);
-void radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer,
- struct radv_graphics_pipeline *pipeline);
+void radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline);
struct radv_indirect_command_layout {
struct vk_object_base base;
uint32_t radv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info);
-void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer,
- const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo);
+void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo);
static inline uint32_t
si_conv_prim_to_gs_out(uint32_t topology, bool is_ngg)
case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
case VK_BLEND_FACTOR_CONSTANT_COLOR:
- return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_COLOR_GFX11
- : V_028780_BLEND_CONSTANT_COLOR_GFX6;
+ return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_COLOR_GFX11 : V_028780_BLEND_CONSTANT_COLOR_GFX6;
case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX11
- : V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX6;
+ : V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX6;
case VK_BLEND_FACTOR_CONSTANT_ALPHA:
- return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_ALPHA_GFX11
- : V_028780_BLEND_CONSTANT_ALPHA_GFX6;
+ return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_ALPHA_GFX11 : V_028780_BLEND_CONSTANT_ALPHA_GFX6;
case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX11
- : V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX6;
+ : V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX6;
case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
return V_028780_BLEND_SRC_ALPHA_SATURATE;
case VK_BLEND_FACTOR_SRC1_COLOR:
return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_COLOR_GFX11 : V_028780_BLEND_SRC1_COLOR_GFX6;
case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
- return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_COLOR_GFX11
- : V_028780_BLEND_INV_SRC1_COLOR_GFX6;
+ return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_COLOR_GFX11 : V_028780_BLEND_INV_SRC1_COLOR_GFX6;
case VK_BLEND_FACTOR_SRC1_ALPHA:
return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_ALPHA_GFX11 : V_028780_BLEND_SRC1_ALPHA_GFX6;
case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
- return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_ALPHA_GFX11
- : V_028780_BLEND_INV_SRC1_ALPHA_GFX6;
+ return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_ALPHA_GFX11 : V_028780_BLEND_INV_SRC1_ALPHA_GFX6;
default:
return 0;
}
case VK_BLEND_FACTOR_ONE:
return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
case VK_BLEND_FACTOR_SRC_COLOR:
- return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
- : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
+ return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
- return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
- : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
+ return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
case VK_BLEND_FACTOR_SRC_ALPHA:
return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
- return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
- : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
+ return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
default:
return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
}
si_blend_factor_uses_dst(VkBlendFactor factor)
{
return factor == VK_BLEND_FACTOR_DST_COLOR || factor == VK_BLEND_FACTOR_DST_ALPHA ||
- factor == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
- factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA ||
+ factor == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA ||
factor == VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR;
}
struct radv_streamout_state *so = &cmd_buffer->state.streamout;
/* Streamout must be enabled for the PRIMITIVES_GENERATED query to work. */
- return (so->streamout_enabled || cmd_buffer->state.active_prims_gen_queries) &&
- !cmd_buffer->state.suspend_streamout;
+ return (so->streamout_enabled || cmd_buffer->state.active_prims_gen_queries) && !cmd_buffer->state.suspend_streamout;
}
/*
static inline bool
radv_has_shader_buffer_float_minmax(const struct radv_physical_device *pdevice, unsigned bitsize)
{
- return (pdevice->rad_info.gfx_level <= GFX7 && !pdevice->use_llvm) ||
- pdevice->rad_info.gfx_level == GFX10 || pdevice->rad_info.gfx_level == GFX10_3 ||
- (pdevice->rad_info.gfx_level == GFX11 && bitsize == 32);
+ return (pdevice->rad_info.gfx_level <= GFX7 && !pdevice->use_llvm) || pdevice->rad_info.gfx_level == GFX10 ||
+ pdevice->rad_info.gfx_level == GFX10_3 || (pdevice->rad_info.gfx_level == GFX11 && bitsize == 32);
}
/* radv_perfcounter.c */
void radv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders);
void radv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs);
-void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs,
- int family);
-void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs,
- int family);
+void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
+void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
/* radv_spm.c */
bool radv_spm_init(struct radv_device *device);
void radv_spm_finish(struct radv_device *device);
void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs);
-void radv_destroy_graphics_pipeline(struct radv_device *device,
- struct radv_graphics_pipeline *pipeline);
-void radv_destroy_graphics_lib_pipeline(struct radv_device *device,
- struct radv_graphics_lib_pipeline *pipeline);
-void radv_destroy_compute_pipeline(struct radv_device *device,
- struct radv_compute_pipeline *pipeline);
-void radv_destroy_ray_tracing_pipeline(struct radv_device *device,
- struct radv_ray_tracing_pipeline *pipeline);
+void radv_destroy_graphics_pipeline(struct radv_device *device, struct radv_graphics_pipeline *pipeline);
+void radv_destroy_graphics_lib_pipeline(struct radv_device *device, struct radv_graphics_lib_pipeline *pipeline);
+void radv_destroy_compute_pipeline(struct radv_device *device, struct radv_compute_pipeline *pipeline);
+void radv_destroy_ray_tracing_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline);
-#define RADV_FROM_HANDLE(__radv_type, __name, __handle) \
- VK_FROM_HANDLE(__radv_type, __name, __handle)
+#define RADV_FROM_HANDLE(__radv_type, __name, __handle) VK_FROM_HANDLE(__radv_type, __name, __handle)
-VK_DEFINE_HANDLE_CASTS(radv_cmd_buffer, vk.base, VkCommandBuffer,
- VK_OBJECT_TYPE_COMMAND_BUFFER)
+VK_DEFINE_HANDLE_CASTS(radv_cmd_buffer, vk.base, VkCommandBuffer, VK_OBJECT_TYPE_COMMAND_BUFFER)
VK_DEFINE_HANDLE_CASTS(radv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
VK_DEFINE_HANDLE_CASTS(radv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
-VK_DEFINE_HANDLE_CASTS(radv_physical_device, vk.base, VkPhysicalDevice,
- VK_OBJECT_TYPE_PHYSICAL_DEVICE)
+VK_DEFINE_HANDLE_CASTS(radv_physical_device, vk.base, VkPhysicalDevice, VK_OBJECT_TYPE_PHYSICAL_DEVICE)
VK_DEFINE_HANDLE_CASTS(radv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer, vk.base, VkBuffer, VK_OBJECT_TYPE_BUFFER)
-VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, base, VkBufferView,
- VK_OBJECT_TYPE_BUFFER_VIEW)
-VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool,
- VK_OBJECT_TYPE_DESCRIPTOR_POOL)
-VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, header.base, VkDescriptorSet,
- VK_OBJECT_TYPE_DESCRIPTOR_SET)
+VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, base, VkBufferView, VK_OBJECT_TYPE_BUFFER_VIEW)
+VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool, VK_OBJECT_TYPE_DESCRIPTOR_POOL)
+VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, header.base, VkDescriptorSet, VK_OBJECT_TYPE_DESCRIPTOR_SET)
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, vk.base, VkDescriptorSetLayout,
VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
-VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, base,
- VkDescriptorUpdateTemplate,
+VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, base, VkDescriptorUpdateTemplate,
VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
-VK_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, base, VkDeviceMemory,
- VK_OBJECT_TYPE_DEVICE_MEMORY)
+VK_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, base, VkDeviceMemory, VK_OBJECT_TYPE_DEVICE_MEMORY)
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
-VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, vk.base, VkImageView,
- VK_OBJECT_TYPE_IMAGE_VIEW);
+VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, vk.base, VkImageView, VK_OBJECT_TYPE_IMAGE_VIEW);
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_indirect_command_layout, base, VkIndirectCommandsLayoutNV,
VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV)
-VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline,
- VK_OBJECT_TYPE_PIPELINE)
-VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, base, VkPipelineLayout,
- VK_OBJECT_TYPE_PIPELINE_LAYOUT)
-VK_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, base, VkQueryPool,
- VK_OBJECT_TYPE_QUERY_POOL)
-VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, base, VkSampler,
- VK_OBJECT_TYPE_SAMPLER)
+VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline, VK_OBJECT_TYPE_PIPELINE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, base, VkPipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT)
+VK_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, base, VkQueryPool, VK_OBJECT_TYPE_QUERY_POOL)
+VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, base, VkSampler, VK_OBJECT_TYPE_SAMPLER)
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_video_session, vk.base, VkVideoSessionKHR, VK_OBJECT_TYPE_VIDEO_SESSION_KHR)
-VK_DEFINE_NONDISP_HANDLE_CASTS(radv_video_session_params, vk.base, VkVideoSessionParametersKHR, VK_OBJECT_TYPE_VIDEO_SESSION_PARAMETERS_KHR)
+VK_DEFINE_NONDISP_HANDLE_CASTS(radv_video_session_params, vk.base, VkVideoSessionParametersKHR,
+ VK_OBJECT_TYPE_VIDEO_SESSION_PARAMETERS_KHR)
#ifdef __cplusplus
}
#include <string.h>
#include "bvh/bvh.h"
+#include "meta/radv_meta.h"
#include "nir/nir_builder.h"
#include "util/u_atomic.h"
#include "vulkan/vulkan_core.h"
#include "radv_cs.h"
-#include "meta/radv_meta.h"
#include "radv_private.h"
#include "sid.h"
#include "vk_acceleration_structure.h"
}
static void
-radv_store_availability(nir_builder *b, nir_ssa_def *flags, nir_ssa_def *dst_buf,
- nir_ssa_def *offset, nir_ssa_def *value32)
+radv_store_availability(nir_builder *b, nir_ssa_def *flags, nir_ssa_def *dst_buf, nir_ssa_def *offset,
+ nir_ssa_def *value32)
{
nir_push_if(b, nir_test_mask(b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
b.shader->info.workgroup_size[0] = 64;
nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
- nir_variable *outer_counter =
- nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter");
+ nir_variable *outer_counter = nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter");
nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start");
nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end");
nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
nir_scoped_memory_barrier(&b, NIR_SCOPE_INVOCATION, NIR_MEMORY_ACQUIRE, nir_var_mem_ssbo);
nir_ssa_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset);
- nir_ssa_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4,
- .access = ACCESS_COHERENT);
+ nir_ssa_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, .access = ACCESS_COHERENT);
nir_push_if(&b, nir_ige_imm(&b, load, 0x80000000));
{
radv_break_on_count(&b, outer_counter, nir_imm_int(&b, db_count));
nir_ssa_def *enabled_cond =
- nir_iand_imm(&b, nir_ishl(&b, nir_imm_int64(&b, 1), current_outer_count),
- enabled_rb_mask);
+ nir_iand_imm(&b, nir_ishl(&b, nir_imm_int64(&b, 1), current_outer_count), enabled_rb_mask);
nir_push_if(&b, nir_i2b(&b, enabled_cond));
nir_push_if(&b, nir_iand(&b, start_done, end_done));
nir_store_var(&b, result,
- nir_iadd(&b, nir_load_var(&b, result),
- nir_isub(&b, nir_load_var(&b, end), nir_load_var(&b, start))),
+ nir_iadd(&b, nir_load_var(&b, result), nir_isub(&b, nir_load_var(&b, end), nir_load_var(&b, start))),
0x1);
nir_push_else(&b, NULL);
/* Store the result if complete or if partial results have been requested. */
nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
- nir_ssa_def *result_size =
- nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
- nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
- nir_load_var(&b, available)));
+ nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
+ nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available)));
nir_push_if(&b, result_is_64bit);
nir_push_else(&b, NULL);
- nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf, output_base,
- .align_mul = 8);
+ nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf, output_base, .align_mul = 8);
nir_pop_if(&b, NULL);
nir_pop_if(&b, NULL);
nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "pipeline_statistics_query");
b.shader->info.workgroup_size[0] = 64;
- nir_variable *output_offset =
- nir_local_variable_create(b.impl, glsl_int_type(), "output_offset");
- nir_variable *result =
- nir_local_variable_create(b.impl, glsl_int64_t_type(), "result");
+ nir_variable *output_offset = nir_local_variable_create(b.impl, glsl_int_type(), "output_offset");
+ nir_variable *result = nir_local_variable_create(b.impl, glsl_int64_t_type(), "result");
nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
nir_ssa_def *stats_mask = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12);
nir_ssa_def *elem_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
nir_ssa_def *elem_count = nir_ushr_imm(&b, stats_mask, 16);
- radv_store_availability(&b, flags, dst_buf,
- nir_iadd(&b, output_base, nir_imul(&b, elem_count, elem_size)),
+ radv_store_availability(&b, flags, dst_buf, nir_iadd(&b, output_base, nir_imul(&b, elem_count, elem_size)),
available32);
nir_push_if(&b, nir_i2b(&b, available32));
VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)));
{
/* Compute the GDS result if needed. */
- nir_ssa_def *gds_start_offset =
- nir_iadd(&b, input_base, nir_imm_int(&b, pipelinestat_block_size * 2));
+ nir_ssa_def *gds_start_offset = nir_iadd(&b, input_base, nir_imm_int(&b, pipelinestat_block_size * 2));
nir_ssa_def *gds_start = nir_load_ssbo(&b, 1, 64, src_buf, gds_start_offset);
- nir_ssa_def *gds_end_offset =
- nir_iadd(&b, input_base, nir_imm_int(&b, pipelinestat_block_size * 2 + 8));
+ nir_ssa_def *gds_end_offset = nir_iadd(&b, input_base, nir_imm_int(&b, pipelinestat_block_size * 2 + 8));
nir_ssa_def *gds_end = nir_load_ssbo(&b, 1, 64, src_buf, gds_end_offset);
nir_ssa_def *ngg_gds_result = nir_isub(&b, gds_end, gds_start);
nir_pop_if(&b, NULL);
- nir_store_var(&b, output_offset, nir_iadd(&b, nir_load_var(&b, output_offset), elem_size),
- 0x1);
+ nir_store_var(&b, output_offset, nir_iadd(&b, nir_load_var(&b, output_offset), elem_size), 0x1);
nir_pop_if(&b, NULL);
}
b.shader->info.workgroup_size[0] = 64;
/* Create and initialize local variables. */
- nir_variable *result =
- nir_local_variable_create(b.impl, glsl_vector_type(GLSL_TYPE_UINT64, 2), "result");
+ nir_variable *result = nir_local_variable_create(b.impl, glsl_vector_type(GLSL_TYPE_UINT64, 2), "result");
nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
nir_store_var(&b, result, nir_replicate(&b, nir_imm_int64(&b, 0), 2), 0x3);
/* Load data from the query pool. */
nir_ssa_def *load1 = nir_load_ssbo(&b, 4, 32, src_buf, input_base, .align_mul = 32);
- nir_ssa_def *load2 =
- nir_load_ssbo(&b, 4, 32, src_buf, nir_iadd_imm(&b, input_base, 16), .align_mul = 16);
+ nir_ssa_def *load2 = nir_load_ssbo(&b, 4, 32, src_buf, nir_iadd_imm(&b, input_base, 16), .align_mul = 16);
/* Check if result is available. */
nir_ssa_def *avails[2];
avails[0] = nir_iand(&b, nir_channel(&b, load1, 1), nir_channel(&b, load1, 3));
avails[1] = nir_iand(&b, nir_channel(&b, load2, 1), nir_channel(&b, load2, 3));
- nir_ssa_def *result_is_available =
- nir_test_mask(&b, nir_iand(&b, avails[0], avails[1]), 0x80000000);
+ nir_ssa_def *result_is_available = nir_test_mask(&b, nir_iand(&b, avails[0], avails[1]), 0x80000000);
/* Only compute result if available. */
nir_push_if(&b, result_is_available);
/* Pack values. */
nir_ssa_def *packed64[4];
- packed64[0] =
- nir_pack_64_2x32(&b, nir_trim_vector(&b, load1, 2));
- packed64[1] =
- nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load1, 2), nir_channel(&b, load1, 3)));
- packed64[2] =
- nir_pack_64_2x32(&b, nir_trim_vector(&b, load2, 2));
- packed64[3] =
- nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load2, 2), nir_channel(&b, load2, 3)));
+ packed64[0] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load1, 2));
+ packed64[1] = nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load1, 2), nir_channel(&b, load1, 3)));
+ packed64[2] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load2, 2));
+ packed64[3] = nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load2, 2), nir_channel(&b, load2, 3)));
/* Compute result. */
nir_ssa_def *num_primitive_written = nir_isub(&b, packed64[3], packed64[1]);
/* Determine if result is 64 or 32 bit. */
nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
- nir_ssa_def *result_size =
- nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), nir_imm_int(&b, 8));
+ nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), nir_imm_int(&b, 8));
/* Store the result if complete or partial results have been requested. */
- nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
- nir_load_var(&b, available)));
+ nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available)));
/* Store result. */
nir_push_if(&b, result_is_64bit);
/* Pack the timestamp. */
nir_ssa_def *timestamp;
- timestamp =
- nir_pack_64_2x32(&b, nir_trim_vector(&b, load, 2));
+ timestamp = nir_pack_64_2x32(&b, nir_trim_vector(&b, load, 2));
/* Check if result is available. */
nir_ssa_def *result_is_available = nir_i2b(&b, nir_ine_imm(&b, timestamp, TIMESTAMP_NOT_READY));
/* Determine if result is 64 or 32 bit. */
nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
- nir_ssa_def *result_size =
- nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
+ nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
/* Store the result if complete or partial results have been requested. */
- nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
- nir_load_var(&b, available)));
+ nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available)));
/* Store result. */
nir_push_if(&b, result_is_64bit);
return b.shader;
}
-#define RADV_PGQ_STRIDE 32
+#define RADV_PGQ_STRIDE 32
#define RADV_PGQ_STRIDE_GDS (RADV_PGQ_STRIDE + 4 * 2)
static nir_shader *
b.shader->info.workgroup_size[0] = 64;
/* Create and initialize local variables. */
- nir_variable *result =
- nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
+ nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
nir_ssa_def *global_id = get_global_ids(&b, 1);
/* Determine if the query pool uses GDS for NGG. */
- nir_ssa_def *uses_gds =
- nir_i2b(&b, nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20));
+ nir_ssa_def *uses_gds = nir_i2b(&b, nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20));
/* Compute src/dst strides. */
- nir_ssa_def *input_stride = nir_bcsel(&b, uses_gds, nir_imm_int(&b, RADV_PGQ_STRIDE_GDS), nir_imm_int(&b, RADV_PGQ_STRIDE));
+ nir_ssa_def *input_stride =
+ nir_bcsel(&b, uses_gds, nir_imm_int(&b, RADV_PGQ_STRIDE_GDS), nir_imm_int(&b, RADV_PGQ_STRIDE));
nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 16);
nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
/* Load data from the query pool. */
nir_ssa_def *load1 = nir_load_ssbo(&b, 2, 32, src_buf, input_base, .align_mul = 32);
- nir_ssa_def *load2 = nir_load_ssbo(
- &b, 2, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 16)), .align_mul = 16);
+ nir_ssa_def *load2 =
+ nir_load_ssbo(&b, 2, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 16)), .align_mul = 16);
/* Check if result is available. */
nir_ssa_def *avails[2];
/* Pack values. */
nir_ssa_def *packed64[2];
- packed64[0] =
- nir_pack_64_2x32(&b, nir_trim_vector(&b, load1, 2));
- packed64[1] =
- nir_pack_64_2x32(&b, nir_trim_vector(&b, load2, 2));
+ packed64[0] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load1, 2));
+ packed64[1] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load2, 2));
/* Compute result. */
nir_ssa_def *primitive_storage_needed = nir_isub(&b, packed64[1], packed64[0]);
/* Determine if result is 64 or 32 bit. */
nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
- nir_ssa_def *result_size =
- nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), nir_imm_int(&b, 8));
+ nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), nir_imm_int(&b, 8));
/* Store the result if complete or partial results have been requested. */
- nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
- nir_load_var(&b, available)));
+ nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available)));
/* Store result. */
nir_push_if(&b, result_is_64bit);
}};
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &occlusion_ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.query.ds_layout);
+ &device->meta_state.alloc, &device->meta_state.query.ds_layout);
if (result != VK_SUCCESS)
goto fail;
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
};
- result =
- radv_CreatePipelineLayout(radv_device_to_handle(device), &occlusion_pl_create_info,
- &device->meta_state.alloc, &device->meta_state.query.p_layout);
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &occlusion_pl_create_info,
+ &device->meta_state.alloc, &device->meta_state.query.p_layout);
if (result != VK_SUCCESS)
goto fail;
.layout = device->meta_state.query.p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &occlusion_vk_pipeline_info, NULL,
- &device->meta_state.query.occlusion_query_pipeline);
+ result =
+ radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &occlusion_vk_pipeline_info,
+ NULL, &device->meta_state.query.occlusion_query_pipeline);
if (result != VK_SUCCESS)
goto fail;
.layout = device->meta_state.query.p_layout,
};
- result = radv_compute_pipeline_create(
- radv_device_to_handle(device), device->meta_state.cache,
- &pipeline_statistics_vk_pipeline_info, NULL,
- &device->meta_state.query.pipeline_statistics_query_pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
+ &pipeline_statistics_vk_pipeline_info, NULL,
+ &device->meta_state.query.pipeline_statistics_query_pipeline);
if (result != VK_SUCCESS)
goto fail;
.layout = device->meta_state.query.p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &tfb_pipeline_info, NULL,
- &device->meta_state.query.tfb_query_pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &tfb_pipeline_info,
+ NULL, &device->meta_state.query.tfb_query_pipeline);
if (result != VK_SUCCESS)
goto fail;
.layout = device->meta_state.query.p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- ×tamp_pipeline_info, NULL,
- &device->meta_state.query.timestamp_query_pipeline);
+ result =
+ radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, ×tamp_pipeline_info,
+ NULL, &device->meta_state.query.timestamp_query_pipeline);
if (result != VK_SUCCESS)
goto fail;
.layout = device->meta_state.query.p_layout,
};
- result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache,
- &pg_pipeline_info, NULL,
- &device->meta_state.query.pg_query_pipeline);
+ result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &pg_pipeline_info,
+ NULL, &device->meta_state.query.pg_query_pipeline);
fail:
ralloc_free(occlusion_cs);
radv_device_finish_meta_query_state(struct radv_device *device)
{
if (device->meta_state.query.tfb_query_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.query.tfb_query_pipeline, &device->meta_state.alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.query.tfb_query_pipeline,
+ &device->meta_state.alloc);
if (device->meta_state.query.pipeline_statistics_query_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.query.pipeline_statistics_query_pipeline,
+ radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.query.pipeline_statistics_query_pipeline,
&device->meta_state.alloc);
if (device->meta_state.query.occlusion_query_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.query.occlusion_query_pipeline,
+ radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.query.occlusion_query_pipeline,
&device->meta_state.alloc);
if (device->meta_state.query.timestamp_query_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.query.timestamp_query_pipeline,
+ radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.query.timestamp_query_pipeline,
&device->meta_state.alloc);
if (device->meta_state.query.pg_query_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.query.pg_query_pipeline, &device->meta_state.alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.query.pg_query_pipeline,
+ &device->meta_state.alloc);
if (device->meta_state.query.p_layout)
radv_DestroyPipelineLayout(radv_device_to_handle(device), device->meta_state.query.p_layout,
&device->meta_state.alloc);
if (device->meta_state.query.ds_layout)
- device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
- device->meta_state.query.ds_layout,
- &device->meta_state.alloc);
+ device->vk.dispatch_table.DestroyDescriptorSetLayout(
+ radv_device_to_handle(device), device->meta_state.query.ds_layout, &device->meta_state.alloc);
}
static void
-radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkPipeline *pipeline,
- struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo,
- uint64_t src_offset, uint64_t dst_offset, uint32_t src_stride,
- uint32_t dst_stride, size_t dst_size, uint32_t count, uint32_t flags,
- uint32_t pipeline_stats_mask, uint32_t avail_offset, bool uses_gds)
+radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkPipeline *pipeline, struct radeon_winsys_bo *src_bo,
+ struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset, uint32_t src_stride,
+ uint32_t dst_stride, size_t dst_size, uint32_t count, uint32_t flags, uint32_t pipeline_stats_mask,
+ uint32_t avail_offset, bool uses_gds)
{
struct radv_device *device = cmd_buffer->device;
struct radv_meta_saved_state saved_state;
* affected by conditional rendering.
*/
radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS |
- RADV_META_SAVE_DESCRIPTORS | RADV_META_SUSPEND_PREDICATING);
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS |
+ RADV_META_SUSPEND_PREDICATING);
uint64_t src_buffer_size = MAX2(src_stride * count, avail_offset + 4 * count - src_offset);
uint64_t dst_buffer_size = dst_stride * (count - 1) + dst_size;
radv_buffer_init(&src_buffer, device, src_bo, src_buffer_size, src_offset);
radv_buffer_init(&dst_buffer, device, dst_bo, dst_buffer_size, dst_offset);
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
- *pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
radv_meta_push_descriptor_set(
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.query.p_layout, 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]){
- {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),
- .offset = 0,
- .range = VK_WHOLE_SIZE}},
- {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&src_buffer),
- .offset = 0,
- .range = VK_WHOLE_SIZE}}});
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),
+ .offset = 0,
+ .range = VK_WHOLE_SIZE}},
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&src_buffer),
+ .offset = 0,
+ .range = VK_WHOLE_SIZE}}});
/* Encode the number of elements for easy access by the shader. */
pipeline_stats_mask &= 0x7ff;
VkResult
radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool,
- bool is_internal)
+ const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool, bool is_internal)
{
VkResult result;
size_t pool_struct_size = pCreateInfo->queryType == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR
? sizeof(struct radv_pc_query_pool)
: sizeof(struct radv_query_pool);
- struct radv_query_pool *pool = vk_alloc2(&device->vk.alloc, pAllocator, pool_struct_size, 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ struct radv_query_pool *pool =
+ vk_alloc2(&device->vk.alloc, pAllocator, pool_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!pool)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
* hardware if GS uses the legacy path. When NGG GS is used, the hardware can't know the number
* of generated primitives and we have to increment it from the shader using a plain GDS atomic.
*/
- pool->uses_gds = (device->physical_device->emulate_ngg_gs_query_pipeline_stat &&
- (pool->pipeline_stats_mask & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)) ||
- (device->physical_device->use_ngg &&
- pCreateInfo->queryType == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT);
+ pool->uses_gds =
+ (device->physical_device->emulate_ngg_gs_query_pipeline_stat &&
+ (pool->pipeline_stats_mask & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)) ||
+ (device->physical_device->use_ngg && pCreateInfo->queryType == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT);
switch (pCreateInfo->queryType) {
case VK_QUERY_TYPE_OCCLUSION:
}
break;
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
- result = radv_pc_init_query_pool(device->physical_device, pCreateInfo,
- (struct radv_pc_query_pool *)pool);
+ result = radv_pc_init_query_pool(device->physical_device, pCreateInfo, (struct radv_pc_query_pool *)pool);
if (result != VK_SUCCESS) {
radv_destroy_query_pool(device, pAllocator, pool);
pool->size += 4 * pCreateInfo->queryCount;
result = device->ws->buffer_create(device->ws, pool->size, 64, RADEON_DOMAIN_GTT,
- RADEON_FLAG_NO_INTERPROCESS_SHARING,
- RADV_BO_PRIORITY_QUERY_POOL, 0, &pool->bo);
+ RADEON_FLAG_NO_INTERPROCESS_SHARING, RADV_BO_PRIORITY_QUERY_POOL, 0, &pool->bo);
if (result != VK_SUCCESS) {
radv_destroy_query_pool(device, pAllocator, pool);
return vk_error(device, result);
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery,
- uint32_t queryCount, size_t dataSize, void *pData, VkDeviceSize stride,
- VkQueryResultFlags flags)
+radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount,
+ size_t dataSize, void *pData, VkDeviceSize stride, VkQueryResultFlags flags)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
do {
start = p_atomic_read(src64 + 2 * i);
end = p_atomic_read(src64 + 2 * i + 1);
- } while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) &&
- (flags & VK_QUERY_RESULT_WAIT_BIT));
+ } while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) && (flags & VK_QUERY_RESULT_WAIT_BIT));
if (!(start & (1ull << 63)) || !(end & (1ull << 63)))
available = 0;
}
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device);
- const uint32_t *avail_ptr =
- (const uint32_t *)(pool->ptr + pool->availability_offset + 4 * query);
+ const uint32_t *avail_ptr = (const uint32_t *)(pool->ptr + pool->availability_offset + 4 * query);
uint64_t ngg_gds_result = 0;
do {
for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
if (pool->pipeline_stats_mask & (1u << i)) {
if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
- *dst = stop[pipeline_statistics_indices[i]] -
- start[pipeline_statistics_indices[i]];
+ *dst = stop[pipeline_statistics_indices[i]] - start[pipeline_statistics_indices[i]];
- if (pool->uses_gds &&
- (1u << i) == VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) {
+ if (pool->uses_gds && (1u << i) == VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) {
*dst += ngg_gds_result;
}
}
for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
if (pool->pipeline_stats_mask & (1u << i)) {
if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
- *dst = stop[pipeline_statistics_indices[i]] -
- start[pipeline_statistics_indices[i]];
+ *dst = stop[pipeline_statistics_indices[i]] - start[pipeline_statistics_indices[i]];
- if (pool->uses_gds &&
- (1u << i) == VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) {
+ if (pool->uses_gds && (1u << i) == VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) {
*dst += ngg_gds_result;
}
}
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
- uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer,
- VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags)
+radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
+ uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride,
+ VkQueryResultFlags flags)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
switch (pool->type) {
case VK_QUERY_TYPE_OCCLUSION:
- radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline,
- pool->bo, dst_buffer->bo, firstQuery * pool->stride,
- dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount,
- flags, 0, 0, false);
+ radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline, pool->bo,
+ dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride,
+ dst_size, queryCount, flags, 0, 0, false);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff);
}
}
- radv_query_shader(
- cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
- pool->bo, dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset,
- pool->stride, stride, dst_size, queryCount, flags, pool->pipeline_stats_mask,
- pool->availability_offset + 4 * firstQuery, pool->uses_gds);
+ radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, pool->bo,
+ dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride,
+ dst_size, queryCount, flags, pool->pipeline_stats_mask,
+ pool->availability_offset + 4 * firstQuery, pool->uses_gds);
break;
case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
/* Wait on the high 32 bits of the timestamp in
* case the low part is 0xffffffff.
*/
- radv_cp_wait_mem(cs, WAIT_REG_MEM_NOT_EQUAL, local_src_va + 4,
- TIMESTAMP_NOT_READY >> 32, 0xffffffff);
+ radv_cp_wait_mem(cs, WAIT_REG_MEM_NOT_EQUAL, local_src_va + 4, TIMESTAMP_NOT_READY >> 32, 0xffffffff);
}
}
- radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.timestamp_query_pipeline,
- pool->bo, dst_buffer->bo, firstQuery * pool->stride,
- dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount,
- flags, 0, 0, false);
+ radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.timestamp_query_pipeline, pool->bo,
+ dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride,
+ dst_size, queryCount, flags, 0, 0, false);
break;
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
/* Wait on the upper word of all results. */
for (unsigned j = 0; j < 4; j++, src_va += 8) {
- radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000,
- 0xffffffff);
+ radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff);
}
}
}
- radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.tfb_query_pipeline,
- pool->bo, dst_buffer->bo, firstQuery * pool->stride,
- dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount,
- flags, 0, 0, false);
+ radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.tfb_query_pipeline, pool->bo, dst_buffer->bo,
+ firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, dst_size,
+ queryCount, flags, 0, 0, false);
break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
}
}
- radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pg_query_pipeline,
- pool->bo, dst_buffer->bo, firstQuery * pool->stride,
- dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount,
- flags, 0, 0, pool->uses_gds && cmd_buffer->device->physical_device->rad_info.gfx_level < GFX11);
+ radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pg_query_pipeline, pool->bo, dst_buffer->bo,
+ firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, dst_size,
+ queryCount, flags, 0, 0,
+ pool->uses_gds && cmd_buffer->device->physical_device->rad_info.gfx_level < GFX11);
break;
default:
unreachable("trying to get results of unhandled query type");
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
- uint32_t queryCount)
+radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
*/
cmd_buffer->state.flush_bits |= cmd_buffer->active_query_flush_bits;
- flush_bits |= radv_fill_buffer(cmd_buffer, NULL, pool->bo,
- radv_buffer_get_va(pool->bo) + firstQuery * pool->stride,
+ flush_bits |= radv_fill_buffer(cmd_buffer, NULL, pool->bo, radv_buffer_get_va(pool->bo) + firstQuery * pool->stride,
queryCount * pool->stride, value);
if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
flush_bits |=
radv_fill_buffer(cmd_buffer, NULL, pool->bo,
- radv_buffer_get_va(pool->bo) + pool->availability_offset + firstQuery * 4,
- queryCount * 4, 0);
+ radv_buffer_get_va(pool->bo) + pool->availability_offset + firstQuery * 4, queryCount * 4, 0);
}
if (flush_bits) {
}
VKAPI_ATTR void VKAPI_CALL
-radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery,
- uint32_t queryCount)
+radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount)
{
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
si_emit_cache_flush(cmd_buffer);
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
radeon_emit(cs, gds_offset);
radeon_emit(cs, 0);
radeon_emit(cs, va);
}
static void
-emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va,
- VkQueryType query_type, VkQueryControlFlags flags, uint32_t index)
+emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, VkQueryType query_type,
+ VkQueryControlFlags flags, uint32_t index)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
switch (query_type) {
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY;
} else {
- if ((flags & VK_QUERY_CONTROL_PRECISE_BIT) &&
- !cmd_buffer->state.perfect_occlusion_queries_enabled) {
+ if ((flags & VK_QUERY_CONTROL_PRECISE_BIT) && !cmd_buffer->state.perfect_occlusion_queries_enabled) {
/* This is not the first query, but this one
* needs to enable precision, DB_COUNT_CONTROL
* has to be updated accordingly.
}
static void
-emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va,
- uint64_t avail_va, VkQueryType query_type, uint32_t index)
+emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, uint64_t avail_va,
+ VkQueryType query_type, uint32_t index)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
switch (query_type) {
radeon_emit(cs, va >> 32);
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
- radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS,
- 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, 1,
- cmd_buffer->gfx9_eop_bug_va);
+ radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
+ EOP_DATA_SEL_VALUE_32BIT, avail_va, 1, cmd_buffer->gfx9_eop_bug_va);
if (pool->uses_gds) {
va += pipelinestat_block_size + 8;
unreachable("ending unhandled query type");
}
- cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 |
- RADV_CMD_FLAG_INV_VCACHE;
+ cmd_buffer->active_query_flush_bits |=
+ RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
- cmd_buffer->active_query_flush_bits |=
- RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+ cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
}
}
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query,
- VkQueryControlFlags flags)
+radv_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags)
{
radv_CmdBeginQueryIndexedEXT(commandBuffer, queryPool, query, flags, 0);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query,
- uint32_t index)
+radv_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, uint32_t index)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage,
- VkQueryPool queryPool, uint32_t query)
+radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkQueryPool queryPool,
+ uint32_t query)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
/* Make sure previously launched waves have finished */
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
}
-
+
si_emit_cache_flush(cmd_buffer);
int num_queries = 1;
for (unsigned i = 0; i < num_queries; i++) {
if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM |
- COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) | COPY_DATA_DST_SEL(V_370_MEM));
+ radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM | COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
+ COPY_DATA_DST_SEL(V_370_MEM));
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, query_va);
radeon_emit(cs, query_va >> 32);
} else {
- si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
- mec, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
- EOP_DATA_SEL_TIMESTAMP, query_va, 0,
+ si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, mec,
+ V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_TIMESTAMP, query_va, 0,
cmd_buffer->gfx9_eop_bug_va);
}
query_va += pool->stride;
}
- cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 |
- RADV_CMD_FLAG_INV_VCACHE;
+ cmd_buffer->active_query_flush_bits |=
+ RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
- cmd_buffer->active_query_flush_bits |=
- RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+ cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
}
assert(cmd_buffer->cs->cdw <= cdw_max);
}
VKAPI_ATTR void VKAPI_CALL
-radv_CmdWriteAccelerationStructuresPropertiesKHR(
- VkCommandBuffer commandBuffer, uint32_t accelerationStructureCount,
- const VkAccelerationStructureKHR *pAccelerationStructures, VkQueryType queryType,
- VkQueryPool queryPool, uint32_t firstQuery)
+radv_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer, uint32_t accelerationStructureCount,
+ const VkAccelerationStructureKHR *pAccelerationStructures,
+ VkQueryType queryType, VkQueryPool queryPool, uint32_t firstQuery)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
si_emit_cache_flush(cmd_buffer);
- ASSERTED unsigned cdw_max =
- radeon_check_space(cmd_buffer->device->ws, cs, 6 * accelerationStructureCount);
+ ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 6 * accelerationStructureCount);
for (uint32_t i = 0; i < accelerationStructureCount; ++i) {
RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pAccelerationStructures[i]);
#include "radv_cs.h"
#include "radv_debug.h"
#include "radv_private.h"
-#include "vk_sync.h"
#include "vk_semaphore.h"
+#include "vk_sync.h"
enum radeon_ctx_priority
radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoKHR *pObj)
}
}
if (size) {
- result = device->ws->buffer_virtual_bind(device->ws, buffer->bo, resourceOffset, size,
- mem ? mem->bo : NULL, memoryOffset);
+ result = device->ws->buffer_virtual_bind(device->ws, buffer->bo, resourceOffset, size, mem ? mem->bo : NULL,
+ memoryOffset);
if (result != VK_SUCCESS)
return result;
memoryOffset = bind->pBinds[i].memoryOffset;
}
if (size) {
- result = device->ws->buffer_virtual_bind(device->ws, buffer->bo, resourceOffset, size,
- mem ? mem->bo : NULL, memoryOffset);
+ result = device->ws->buffer_virtual_bind(device->ws, buffer->bo, resourceOffset, size, mem ? mem->bo : NULL,
+ memoryOffset);
if (mem)
radv_rmv_log_sparse_add_residency(device, buffer->bo, memoryOffset);
}
static VkResult
-radv_sparse_image_opaque_bind_memory(struct radv_device *device,
- const VkSparseImageOpaqueMemoryBindInfo *bind)
+radv_sparse_image_opaque_bind_memory(struct radv_device *device, const VkSparseImageOpaqueMemoryBindInfo *bind)
{
RADV_FROM_HANDLE(radv_image, image, bind->image);
VkResult result;
if (bind->pBinds[i].memory != VK_NULL_HANDLE)
mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
- result = device->ws->buffer_virtual_bind(device->ws, image->bindings[0].bo,
- bind->pBinds[i].resourceOffset, bind->pBinds[i].size,
- mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
+ result =
+ device->ws->buffer_virtual_bind(device->ws, image->bindings[0].bo, bind->pBinds[i].resourceOffset,
+ bind->pBinds[i].size, mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
if (result != VK_SUCCESS)
return result;
if (bind->pBinds[i].memory)
- radv_rmv_log_sparse_add_residency(device, image->bindings[0].bo,
- bind->pBinds[i].resourceOffset);
+ radv_rmv_log_sparse_add_residency(device, image->bindings[0].bo, bind->pBinds[i].resourceOffset);
else
- radv_rmv_log_sparse_remove_residency(device, image->bindings[0].bo,
- bind->pBinds[i].resourceOffset);
+ radv_rmv_log_sparse_remove_residency(device, image->bindings[0].bo, bind->pBinds[i].resourceOffset);
}
return VK_SUCCESS;
const uint32_t level = bind->pBinds[i].subresource.mipLevel;
VkExtent3D bind_extent = bind->pBinds[i].extent;
- bind_extent.width =
- DIV_ROUND_UP(bind_extent.width, vk_format_get_blockwidth(image->vk.format));
- bind_extent.height =
- DIV_ROUND_UP(bind_extent.height, vk_format_get_blockheight(image->vk.format));
+ bind_extent.width = DIV_ROUND_UP(bind_extent.width, vk_format_get_blockwidth(image->vk.format));
+ bind_extent.height = DIV_ROUND_UP(bind_extent.height, vk_format_get_blockheight(image->vk.format));
VkOffset3D bind_offset = bind->pBinds[i].offset;
bind_offset.x /= vk_format_get_blockwidth(image->vk.format);
pitch = surface->u.legacy.level[level].nblk_x;
}
- offset += bind_offset.z * depth_pitch +
- ((uint64_t)bind_offset.y * pitch * surface->prt_tile_depth +
- (uint64_t)bind_offset.x * surface->prt_tile_height * surface->prt_tile_depth) *
- bs;
+ offset +=
+ bind_offset.z * depth_pitch + ((uint64_t)bind_offset.y * pitch * surface->prt_tile_depth +
+ (uint64_t)bind_offset.x * surface->prt_tile_height * surface->prt_tile_depth) *
+ bs;
uint32_t aligned_extent_width = ALIGN(bind_extent.width, surface->prt_tile_width);
uint32_t aligned_extent_height = ALIGN(bind_extent.height, surface->prt_tile_height);
uint32_t aligned_extent_depth = ALIGN(bind_extent.depth, surface->prt_tile_depth);
- bool whole_subres =
- (bind_extent.height <= surface->prt_tile_height || aligned_extent_width == pitch) &&
- (bind_extent.depth <= surface->prt_tile_depth ||
- (uint64_t)aligned_extent_width * aligned_extent_height * bs == depth_pitch);
+ bool whole_subres = (bind_extent.height <= surface->prt_tile_height || aligned_extent_width == pitch) &&
+ (bind_extent.depth <= surface->prt_tile_depth ||
+ (uint64_t)aligned_extent_width * aligned_extent_height * bs == depth_pitch);
if (whole_subres) {
uint64_t size = (uint64_t)aligned_extent_width * aligned_extent_height * aligned_extent_depth * bs;
- result = device->ws->buffer_virtual_bind(device->ws, image->bindings[0].bo, offset, size,
- mem ? mem->bo : NULL, mem_offset);
+ result = device->ws->buffer_virtual_bind(device->ws, image->bindings[0].bo, offset, size, mem ? mem->bo : NULL,
+ mem_offset);
if (result != VK_SUCCESS)
return result;
z += surface->prt_tile_depth, offset += depth_pitch * surface->prt_tile_depth) {
for (unsigned y = 0; y < bind_extent.height; y += surface->prt_tile_height) {
result = device->ws->buffer_virtual_bind(
- device->ws, image->bindings[0].bo, offset + (uint64_t)img_y_increment * y, size,
- mem ? mem->bo : NULL, mem_offset + (uint64_t)mem_y_increment * y + mem_z_increment * z);
+ device->ws, image->bindings[0].bo, offset + (uint64_t)img_y_increment * y, size, mem ? mem->bo : NULL,
+ mem_offset + (uint64_t)mem_y_increment * y + mem_z_increment * z);
if (result != VK_SUCCESS)
return result;
}
for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
- VkResult result =
- radv_sparse_image_opaque_bind_memory(device, submission->image_opaque_binds + i);
+ VkResult result = radv_sparse_image_opaque_bind_memory(device, submission->image_opaque_binds + i);
if (result != VK_SUCCESS)
return result;
}
}
static void
-radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sample_positions,
- uint32_t esgs_ring_size, struct radeon_winsys_bo *esgs_ring_bo,
- uint32_t gsvs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo,
- struct radeon_winsys_bo *tess_rings_bo,
- struct radeon_winsys_bo *task_rings_bo,
- struct radeon_winsys_bo *mesh_scratch_ring_bo, uint32_t attr_ring_size,
- struct radeon_winsys_bo *attr_ring_bo)
+radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sample_positions, uint32_t esgs_ring_size,
+ struct radeon_winsys_bo *esgs_ring_bo, uint32_t gsvs_ring_size,
+ struct radeon_winsys_bo *gsvs_ring_bo, struct radeon_winsys_bo *tess_rings_bo,
+ struct radeon_winsys_bo *task_rings_bo, struct radeon_winsys_bo *mesh_scratch_ring_bo,
+ uint32_t attr_ring_size, struct radeon_winsys_bo *attr_ring_bo)
{
uint32_t *desc = &map[4];
desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
if (device->physical_device->rad_info.gfx_level >= GFX11) {
- desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
} else if (device->physical_device->rad_info.gfx_level >= GFX8) {
/* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */
- desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) |
- S_008F0C_ELEMENT_SIZE(1);
+ desc[3] |=
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1);
} else {
desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->rad_info.gfx_level >= GFX11) {
- desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
+ desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
} else {
- desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ desc[7] |=
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
}
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->rad_info.gfx_level >= GFX11) {
- desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
} else {
- desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ desc[3] |=
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
/* stride gsvs_itemsize, num records 64
desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
if (device->physical_device->rad_info.gfx_level >= GFX11) {
- desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
+ desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
} else if (device->physical_device->rad_info.gfx_level >= GFX8) {
/* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */
- desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) |
- S_008F0C_ELEMENT_SIZE(1);
+ desc[7] |=
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1);
} else {
desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->rad_info.gfx_level >= GFX11) {
- desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
- desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
+ S_008F0C_RESOURCE_LEVEL(1);
} else {
- desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ desc[3] |=
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
desc[4] = tess_offchip_va;
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->rad_info.gfx_level >= GFX11) {
- desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
+ desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
- desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+ desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
+ S_008F0C_RESOURCE_LEVEL(1);
} else {
- desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ desc[7] |=
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
}
if (task_rings_bo) {
uint64_t task_va = radv_buffer_get_va(task_rings_bo);
uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset;
- uint64_t task_payload_ring_va =
- task_va + device->physical_device->task_info.payload_ring_offset;
+ uint64_t task_payload_ring_va = task_va + device->physical_device->task_info.payload_ring_offset;
desc[0] = task_draw_ring_va;
desc[1] = S_008F04_BASE_ADDRESS_HI(task_draw_ring_va >> 32);
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->rad_info.gfx_level >= GFX11) {
- desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else {
assert(device->physical_device->rad_info.gfx_level >= GFX10_3);
- desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
+ S_008F0C_RESOURCE_LEVEL(1);
}
desc[4] = task_payload_ring_va;
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->rad_info.gfx_level >= GFX11) {
- desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
+ desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else {
assert(device->physical_device->rad_info.gfx_level >= GFX10_3);
- desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
+ desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
+ S_008F0C_RESOURCE_LEVEL(1);
}
}
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->rad_info.gfx_level >= GFX11) {
- desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else {
assert(device->physical_device->rad_info.gfx_level >= GFX10_3);
- desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
+ S_008F0C_RESOURCE_LEVEL(1);
}
}
desc[2] = attr_ring_size;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_32_32_32_FLOAT) |
- S_008F0C_INDEX_STRIDE(2) /* 32 elements */;
+ S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_32_32_32_FLOAT) | S_008F0C_INDEX_STRIDE(2) /* 32 elements */;
}
desc += 4;
}
static void
-radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs,
- struct radeon_winsys_bo *esgs_ring_bo, uint32_t esgs_ring_size,
- struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size)
+radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *esgs_ring_bo,
+ uint32_t esgs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size)
{
if (!esgs_ring_bo && !gsvs_ring_bo)
return;
}
static void
-radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs,
- struct radeon_winsys_bo *tess_rings_bo)
+radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *tess_rings_bo)
{
uint64_t tf_va;
uint32_t tf_ring_size;
radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));
}
- radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
- device->physical_device->hs.hs_offchip_param);
+ radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param);
} else {
radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size));
radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);
- radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
- device->physical_device->hs.hs_offchip_param);
+ radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param);
}
}
static VkResult
-radv_initialise_task_control_buffer(struct radv_device *device,
- struct radeon_winsys_bo *task_rings_bo)
+radv_initialise_task_control_buffer(struct radv_device *device, struct radeon_winsys_bo *task_rings_bo)
{
uint32_t *ptr = (uint32_t *)device->ws->buffer_map(task_rings_bo);
if (!ptr)
}
static void
-radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs,
- struct radeon_winsys_bo *task_rings_bo, bool compute)
+radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *task_rings_bo,
+ bool compute)
{
if (!task_rings_bo)
return;
}
static void
-radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs,
- uint32_t size_per_wave, uint32_t waves,
+radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves,
struct radeon_winsys_bo *scratch_bo)
{
const struct radeon_info *info = &device->physical_device->rad_info;
radeon_emit(cs, va >> 8); /* SPI_GFX_SCRATCH_BASE_LO */
radeon_emit(cs, va >> 40); /* SPI_GFX_SCRATCH_BASE_HI */
} else {
- radeon_set_context_reg(
- cs, R_0286E8_SPI_TMPRING_SIZE,
- S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
+ radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
+ S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
}
}
static void
-radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs,
- uint32_t size_per_wave, uint32_t waves,
+radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves,
struct radeon_winsys_bo *compute_scratch_bo)
{
const struct radeon_info *info = &device->physical_device->rad_info;
radeon_set_sh_reg(
cs, R_00B860_COMPUTE_TMPRING_SIZE,
- S_00B860_WAVES(waves) |
- S_00B860_WAVESIZE(round_up_u32(size_per_wave, info->gfx_level >= GFX11 ? 256 : 1024)));
+ S_00B860_WAVES(waves) | S_00B860_WAVESIZE(round_up_u32(size_per_wave, info->gfx_level >= GFX11 ? 256 : 1024)));
}
static void
}
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
- R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
- R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
+ R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
radv_emit_shader_pointer(device, cs, regs[i], va, true);
}
} else if (device->physical_device->rad_info.gfx_level == GFX9) {
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
- R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
- R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
+ R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
radv_emit_shader_pointer(device, cs, regs[i], va, true);
}
static void
-radv_emit_attribute_ring(struct radv_device *device, struct radeon_cmdbuf *cs,
- struct radeon_winsys_bo *attr_ring_bo, uint32_t attr_ring_size)
+radv_emit_attribute_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *attr_ring_bo,
+ uint32_t attr_ring_size)
{
const struct radv_physical_device *pdevice = device->physical_device;
uint64_t va;
* bottom-of-pipe EOP event, but increment the PWS counter instead of writing memory.
*/
radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0));
- radeon_emit(cs, S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5) |
- S_490_PWS_ENABLE(1));
+ radeon_emit(cs, S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5) | S_490_PWS_ENABLE(1));
radeon_emit(cs, 0); /* DST_SEL, INT_SEL, DATA_SEL */
radeon_emit(cs, 0); /* ADDRESS_LO */
radeon_emit(cs, 0); /* ADDRESS_HI */
/* Wait for the PWS counter. */
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
- radeon_emit(cs, S_580_PWS_STAGE_SEL(V_580_CP_ME) | S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) |
- S_580_PWS_ENA2(1) | S_580_PWS_COUNT(0));
+ radeon_emit(cs, S_580_PWS_STAGE_SEL(V_580_CP_ME) | S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) | S_580_PWS_ENA2(1) |
+ S_580_PWS_COUNT(0));
radeon_emit(cs, 0xffffffff); /* GCR_SIZE */
radeon_emit(cs, 0x01ffffff); /* GCR_SIZE_HI */
radeon_emit(cs, 0); /* GCR_BASE_LO */
/* The PS will read inputs from this address. */
radeon_set_uconfig_reg(cs, R_031118_SPI_ATTRIBUTE_RING_BASE, va >> 16);
- radeon_set_uconfig_reg(
- cs, R_03111C_SPI_ATTRIBUTE_RING_SIZE,
- S_03111C_MEM_SIZE(((attr_ring_size / pdevice->rad_info.max_se) >> 16) - 1) |
- S_03111C_BIG_PAGE(pdevice->rad_info.discardable_allows_big_page) | S_03111C_L1_POLICY(1));
+ radeon_set_uconfig_reg(cs, R_03111C_SPI_ATTRIBUTE_RING_SIZE,
+ S_03111C_MEM_SIZE(((attr_ring_size / pdevice->rad_info.max_se) >> 16) - 1) |
+ S_03111C_BIG_PAGE(pdevice->rad_info.discardable_allows_big_page) | S_03111C_L1_POLICY(1));
}
static void
const bool add_sample_positions = !queue->ring_info.sample_positions && needs->sample_positions;
const uint32_t scratch_size = needs->scratch_size_per_wave * needs->scratch_waves;
- const uint32_t queue_scratch_size =
- queue->ring_info.scratch_size_per_wave * queue->ring_info.scratch_waves;
+ const uint32_t queue_scratch_size = queue->ring_info.scratch_size_per_wave * queue->ring_info.scratch_waves;
if (scratch_size > queue_scratch_size) {
- result = ws->buffer_create(ws, scratch_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags,
- RADV_BO_PRIORITY_SCRATCH, 0, &scratch_bo);
+ result = ws->buffer_create(ws, scratch_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0,
+ &scratch_bo);
if (result != VK_SUCCESS)
goto fail;
radv_rmv_log_command_buffer_bo_create(device, scratch_bo, 0, 0, scratch_size);
}
- const uint32_t compute_scratch_size =
- needs->compute_scratch_size_per_wave * needs->compute_scratch_waves;
+ const uint32_t compute_scratch_size = needs->compute_scratch_size_per_wave * needs->compute_scratch_waves;
const uint32_t compute_queue_scratch_size =
queue->ring_info.compute_scratch_size_per_wave * queue->ring_info.compute_scratch_waves;
if (compute_scratch_size > compute_queue_scratch_size) {
}
if (!queue->ring_info.tess_rings && needs->tess_rings) {
- uint64_t tess_rings_size = device->physical_device->hs.tess_offchip_ring_offset +
- device->physical_device->hs.tess_offchip_ring_size;
- result = ws->buffer_create(ws, tess_rings_size, 256, RADEON_DOMAIN_VRAM, ring_bo_flags,
- RADV_BO_PRIORITY_SCRATCH, 0, &tess_rings_bo);
+ uint64_t tess_rings_size =
+ device->physical_device->hs.tess_offchip_ring_offset + device->physical_device->hs.tess_offchip_ring_size;
+ result = ws->buffer_create(ws, tess_rings_size, 256, RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH,
+ 0, &tess_rings_bo);
if (result != VK_SUCCESS)
goto fail;
radv_rmv_log_command_buffer_bo_create(device, tess_rings_bo, 0, 0, tess_rings_size);
uint32_t task_rings_bo_flags =
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM;
- result = ws->buffer_create(ws, device->physical_device->task_info.bo_size_bytes, 256,
- RADEON_DOMAIN_VRAM, task_rings_bo_flags, RADV_BO_PRIORITY_SCRATCH,
- 0, &task_rings_bo);
+ result = ws->buffer_create(ws, device->physical_device->task_info.bo_size_bytes, 256, RADEON_DOMAIN_VRAM,
+ task_rings_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &task_rings_bo);
if (result != VK_SUCCESS)
goto fail;
radv_rmv_log_command_buffer_bo_create(device, task_rings_bo, 0, 0,
if (!queue->ring_info.mesh_scratch_ring && needs->mesh_scratch_ring) {
assert(device->physical_device->rad_info.gfx_level >= GFX10_3);
- result = ws->buffer_create(ws, RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES,
- 256, RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH,
- 0, &mesh_scratch_ring_bo);
+ result = ws->buffer_create(ws, RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES, 256,
+ RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &mesh_scratch_ring_bo);
if (result != VK_SUCCESS)
goto fail;
- radv_rmv_log_command_buffer_bo_create(
- device, mesh_scratch_ring_bo, 0, 0,
- RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES);
+ radv_rmv_log_command_buffer_bo_create(device, mesh_scratch_ring_bo, 0, 0,
+ RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES);
}
if (needs->attr_ring_size > queue->ring_info.attr_ring_size) {
assert(device->physical_device->rad_info.gfx_level >= GFX11);
- result = ws->buffer_create(ws, needs->attr_ring_size, 2 * 1024 * 1024 /* 2MiB */,
- RADEON_DOMAIN_VRAM,
- RADEON_FLAG_32BIT | RADEON_FLAG_DISCARDABLE | ring_bo_flags,
- RADV_BO_PRIORITY_SCRATCH, 0, &attr_ring_bo);
+ result = ws->buffer_create(ws, needs->attr_ring_size, 2 * 1024 * 1024 /* 2MiB */, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_32BIT | RADEON_FLAG_DISCARDABLE | ring_bo_flags, RADV_BO_PRIORITY_SCRATCH,
+ 0, &attr_ring_bo);
if (result != VK_SUCCESS)
goto fail;
radv_rmv_log_command_buffer_bo_create(device, attr_ring_bo, 0, 0, needs->attr_ring_size);
/* 4 streamout GDS counters.
* We need 256B (64 dw) of GDS, otherwise streamout hangs.
*/
- result = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, ring_bo_flags,
- RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo);
+ result = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo);
if (result != VK_SUCCESS)
goto fail;
if (!queue->ring_info.gds_oa && needs->gds_oa) {
assert(device->physical_device->rad_info.gfx_level >= GFX10);
- result = ws->buffer_create(ws, 4, 1, RADEON_DOMAIN_OA, ring_bo_flags,
- RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo);
+ result = ws->buffer_create(ws, 4, 1, RADEON_DOMAIN_OA, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo);
if (result != VK_SUCCESS)
goto fail;
* when it uses the task shader rings. The task rings BO is shared between the
* GFX and compute queues and already initialized here.
*/
- if ((queue->qf == RADV_QUEUE_COMPUTE && !descriptor_bo && task_rings_bo) ||
- scratch_bo != queue->scratch_bo || esgs_ring_bo != queue->esgs_ring_bo ||
- gsvs_ring_bo != queue->gsvs_ring_bo || tess_rings_bo != queue->tess_rings_bo ||
- task_rings_bo != queue->task_rings_bo ||
+ if ((queue->qf == RADV_QUEUE_COMPUTE && !descriptor_bo && task_rings_bo) || scratch_bo != queue->scratch_bo ||
+ esgs_ring_bo != queue->esgs_ring_bo || gsvs_ring_bo != queue->gsvs_ring_bo ||
+ tess_rings_bo != queue->tess_rings_bo || task_rings_bo != queue->task_rings_bo ||
mesh_scratch_ring_bo != queue->mesh_scratch_ring_bo || attr_ring_bo != queue->attr_ring_bo ||
add_sample_positions) {
uint32_t size = 0;
- if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo ||
- attr_ring_bo || add_sample_positions) {
+ if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo || attr_ring_bo ||
+ add_sample_positions) {
size = 176; /* 2 dword + 2 padding + 4 dword * 10 */
if (add_sample_positions)
size += 128; /* 64+32+16+8 = 120 bytes */
size = 8; /* 2 dword */
}
- result = ws->buffer_create(
- ws, size, 4096, RADEON_DOMAIN_VRAM,
- RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
- RADV_BO_PRIORITY_DESCRIPTOR, 0, &descriptor_bo);
+ result = ws->buffer_create(ws, size, 4096, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
+ RADV_BO_PRIORITY_DESCRIPTOR, 0, &descriptor_bo);
if (result != VK_SUCCESS)
goto fail;
}
map[1] = rsrc1;
}
- if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo ||
- attr_ring_bo || add_sample_positions)
- radv_fill_shader_rings(device, map, add_sample_positions, needs->esgs_ring_size,
- esgs_ring_bo, needs->gsvs_ring_size, gsvs_ring_bo, tess_rings_bo,
- task_rings_bo, mesh_scratch_ring_bo, needs->attr_ring_size,
- attr_ring_bo);
+ if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo || attr_ring_bo ||
+ add_sample_positions)
+ radv_fill_shader_rings(device, map, add_sample_positions, needs->esgs_ring_size, esgs_ring_bo,
+ needs->gsvs_ring_size, gsvs_ring_bo, tess_rings_bo, task_rings_bo, mesh_scratch_ring_bo,
+ needs->attr_ring_size, attr_ring_bo);
ws->buffer_unmap(descriptor_bo);
}
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
}
- radv_emit_gs_ring_sizes(device, cs, esgs_ring_bo, needs->esgs_ring_size, gsvs_ring_bo,
- needs->gsvs_ring_size);
+ radv_emit_gs_ring_sizes(device, cs, esgs_ring_bo, needs->esgs_ring_size, gsvs_ring_bo, needs->gsvs_ring_size);
radv_emit_tess_factor_ring(device, cs, tess_rings_bo);
radv_emit_task_rings(device, cs, task_rings_bo, false);
radv_emit_attribute_ring(device, cs, attr_ring_bo, needs->attr_ring_size);
radv_emit_graphics_shader_pointers(device, cs, descriptor_bo);
- radv_emit_compute_scratch(device, cs, needs->compute_scratch_size_per_wave,
- needs->compute_scratch_waves, compute_scratch_bo);
- radv_emit_graphics_scratch(device, cs, needs->scratch_size_per_wave, needs->scratch_waves,
- scratch_bo);
+ radv_emit_compute_scratch(device, cs, needs->compute_scratch_size_per_wave, needs->compute_scratch_waves,
+ compute_scratch_bo);
+ radv_emit_graphics_scratch(device, cs, needs->scratch_size_per_wave, needs->scratch_waves, scratch_bo);
break;
case RADV_QUEUE_COMPUTE:
radv_init_compute_state(cs, device);
radv_emit_task_rings(device, cs, task_rings_bo, true);
radv_emit_compute_shader_pointers(device, cs, descriptor_bo);
- radv_emit_compute_scratch(device, cs, needs->compute_scratch_size_per_wave,
- needs->compute_scratch_waves, compute_scratch_bo);
+ radv_emit_compute_scratch(device, cs, needs->compute_scratch_size_per_wave, needs->compute_scratch_waves,
+ compute_scratch_bo);
break;
default:
break;
flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
}
- si_cs_emit_cache_flush(ws, cs, gfx_level, NULL, 0, is_mec, flush_bits, &sqtt_flush_bits,
- 0);
+ si_cs_emit_cache_flush(ws, cs, gfx_level, NULL, 0, is_mec, flush_bits, &sqtt_flush_bits, 0);
}
result = ws->cs_finalize(cs);
static VkResult
radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device,
- struct vk_command_buffer *const *cmd_buffers, uint32_t cmd_buffer_count,
- bool *use_perf_counters, bool *has_follower)
+ struct vk_command_buffer *const *cmd_buffers, uint32_t cmd_buffer_count, bool *use_perf_counters,
+ bool *has_follower)
{
if (queue->qf != RADV_QUEUE_GENERAL && queue->qf != RADV_QUEUE_COMPUTE)
return VK_SUCCESS;
for (uint32_t j = 0; j < cmd_buffer_count; j++) {
struct radv_cmd_buffer *cmd_buffer = container_of(cmd_buffers[j], struct radv_cmd_buffer, vk);
- needs.scratch_size_per_wave =
- MAX2(needs.scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
+ needs.scratch_size_per_wave = MAX2(needs.scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
needs.scratch_waves = MAX2(needs.scratch_waves, cmd_buffer->scratch_waves_wanted);
- needs.compute_scratch_size_per_wave = MAX2(needs.compute_scratch_size_per_wave,
- cmd_buffer->compute_scratch_size_per_wave_needed);
- needs.compute_scratch_waves =
- MAX2(needs.compute_scratch_waves, cmd_buffer->compute_scratch_waves_wanted);
+ needs.compute_scratch_size_per_wave =
+ MAX2(needs.compute_scratch_size_per_wave, cmd_buffer->compute_scratch_size_per_wave_needed);
+ needs.compute_scratch_waves = MAX2(needs.compute_scratch_waves, cmd_buffer->compute_scratch_waves_wanted);
needs.esgs_ring_size = MAX2(needs.esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
needs.gsvs_ring_size = MAX2(needs.gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
needs.tess_rings |= cmd_buffer->tess_rings_needed;
}
/* Sanitize scratch size information. */
- needs.scratch_waves = needs.scratch_size_per_wave
- ? MIN2(needs.scratch_waves, UINT32_MAX / needs.scratch_size_per_wave)
- : 0;
+ needs.scratch_waves =
+ needs.scratch_size_per_wave ? MIN2(needs.scratch_waves, UINT32_MAX / needs.scratch_size_per_wave) : 0;
needs.compute_scratch_waves =
needs.compute_scratch_size_per_wave
? MIN2(needs.compute_scratch_waves, UINT32_MAX / needs.compute_scratch_size_per_wave)
: 0;
if (device->physical_device->rad_info.gfx_level >= GFX11 && queue->qf == RADV_QUEUE_GENERAL) {
- needs.attr_ring_size = device->physical_device->rad_info.attribute_ring_size_per_se *
- device->physical_device->rad_info.max_se;
+ needs.attr_ring_size =
+ device->physical_device->rad_info.attribute_ring_size_per_se * device->physical_device->rad_info.max_se;
}
/* Return early if we already match these needs.
* Note that it's not possible for any of the needed values to be less
* than what the queue already had, because we only ever increase the allocated size.
*/
- if (queue->initial_full_flush_preamble_cs &&
- queue->ring_info.scratch_size_per_wave == needs.scratch_size_per_wave &&
+ if (queue->initial_full_flush_preamble_cs && queue->ring_info.scratch_size_per_wave == needs.scratch_size_per_wave &&
queue->ring_info.scratch_waves == needs.scratch_waves &&
queue->ring_info.compute_scratch_size_per_wave == needs.compute_scratch_size_per_wave &&
queue->ring_info.compute_scratch_waves == needs.compute_scratch_waves &&
queue->ring_info.esgs_ring_size == needs.esgs_ring_size &&
- queue->ring_info.gsvs_ring_size == needs.gsvs_ring_size &&
- queue->ring_info.tess_rings == needs.tess_rings &&
+ queue->ring_info.gsvs_ring_size == needs.gsvs_ring_size && queue->ring_info.tess_rings == needs.tess_rings &&
queue->ring_info.task_rings == needs.task_rings &&
queue->ring_info.mesh_scratch_ring == needs.mesh_scratch_ring &&
- queue->ring_info.attr_ring_size == needs.attr_ring_size &&
- queue->ring_info.gds == needs.gds && queue->ring_info.gds_oa == needs.gds_oa &&
- queue->ring_info.sample_positions == needs.sample_positions)
+ queue->ring_info.attr_ring_size == needs.attr_ring_size && queue->ring_info.gds == needs.gds &&
+ queue->ring_info.gds_oa == needs.gds_oa && queue->ring_info.sample_positions == needs.sample_positions)
return VK_SUCCESS;
return radv_update_preamble_cs(queue, device, &needs);
VkResult r = VK_SUCCESS;
struct radv_device *device = queue->device;
struct radeon_winsys *ws = device->ws;
- const enum amd_ip_type leader_ip =
- radv_queue_family_to_ring(device->physical_device, queue->state.qf);
+ const enum amd_ip_type leader_ip = radv_queue_family_to_ring(device->physical_device, queue->state.qf);
struct radeon_winsys_bo *gang_sem_bo = NULL;
/* Gang semaphores BO.
* DWORD 0: used in preambles, gang leader writes, gang members wait.
* DWORD 1: used in postambles, gang leader waits, gang members write.
*/
- r = ws->buffer_create(ws, 8, 4, RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
+ r = ws->buffer_create(ws, 8, 4, RADEON_DOMAIN_VRAM, RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
RADV_BO_PRIORITY_SCRATCH, 0, &gang_sem_bo);
if (r != VK_SUCCESS)
return r;
*/
radv_cp_wait_mem(ace_pre_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, ace_wait_va, 1, 0xffffffff);
radeon_emit(ace_pre_cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(ace_pre_cs,
- S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
+ radeon_emit(ace_pre_cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
radeon_emit(ace_pre_cs, ace_wait_va);
radeon_emit(ace_pre_cs, ace_wait_va >> 32);
radeon_emit(ace_pre_cs, 0);
radeon_emit(leader_pre_cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(leader_pre_cs,
- S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
+ radeon_emit(leader_pre_cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
radeon_emit(leader_pre_cs, ace_wait_va);
radeon_emit(leader_pre_cs, ace_wait_va >> 32);
radeon_emit(leader_pre_cs, 1);
*/
radv_cp_wait_mem(leader_post_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, leader_wait_va, 1, 0xffffffff);
radeon_emit(leader_post_cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(leader_post_cs,
- S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
+ radeon_emit(leader_post_cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
radeon_emit(leader_post_cs, leader_wait_va);
radeon_emit(leader_post_cs, leader_wait_va >> 32);
radeon_emit(leader_post_cs, 0);
radeon_emit(ace_post_cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(ace_post_cs,
- S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
+ radeon_emit(ace_post_cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
radeon_emit(ace_post_cs, leader_wait_va);
radeon_emit(ace_post_cs, leader_wait_va >> 32);
radeon_emit(ace_post_cs, 1);
uint64_t set_va = va + (unlock ? 0 : 8 * pass);
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_COUNT_SEL |
+ COPY_DATA_WR_CONFIRM);
radeon_emit(cs, 0); /* immediate */
radeon_emit(cs, 0);
radeon_emit(cs, unset_va);
radeon_emit(cs, unset_va >> 32);
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_COUNT_SEL |
+ COPY_DATA_WR_CONFIRM);
radeon_emit(cs, 1); /* immediate */
radeon_emit(cs, 0);
radeon_emit(cs, set_va);
uint64_t mutex_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_LOCK_OFFSET;
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_COUNT_SEL |
+ COPY_DATA_WR_CONFIRM);
radeon_emit(cs, 0); /* immediate */
radeon_emit(cs, 0);
radeon_emit(cs, mutex_va);
}
memcpy(new_waits, submission->waits, sizeof(struct vk_sync_wait) * submission->wait_count);
- radv_get_shader_upload_sync_wait(queue->device, shader_upload_seq,
- &new_waits[submission->wait_count]);
+ radv_get_shader_upload_sync_wait(queue->device, shader_upload_seq, &new_waits[submission->wait_count]);
waits = new_waits;
wait_count += 1;
initial_preambles[num_initial_preambles++] = queue->state.gang_wait_preamble_cs;
initial_preambles[num_initial_preambles++] = queue->follower_state->gang_wait_preamble_cs;
initial_preambles[num_initial_preambles++] =
- need_wait ? queue->follower_state->initial_full_flush_preamble_cs
- : queue->follower_state->initial_preamble_cs;
+ need_wait ? queue->follower_state->initial_full_flush_preamble_cs : queue->follower_state->initial_preamble_cs;
continue_preambles[num_continue_preambles++] = queue->state.gang_wait_preamble_cs;
- continue_preambles[num_continue_preambles++] =
- queue->follower_state->gang_wait_preamble_cs;
- continue_preambles[num_continue_preambles++] =
- queue->follower_state->continue_preamble_cs;
+ continue_preambles[num_continue_preambles++] = queue->follower_state->gang_wait_preamble_cs;
+ continue_preambles[num_continue_preambles++] = queue->follower_state->continue_preamble_cs;
postambles[num_postambles++] = queue->follower_state->gang_wait_postamble_cs;
postambles[num_postambles++] = queue->state.gang_wait_postamble_cs;
/* Add CS from submitted command buffers. */
for (unsigned c = 0; c < advance; ++c) {
- struct radv_cmd_buffer *cmd_buffer =
- (struct radv_cmd_buffer *)submission->command_buffers[j + c];
+ struct radv_cmd_buffer *cmd_buffer = (struct radv_cmd_buffer *)submission->command_buffers[j + c];
assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
- const bool can_chain_next =
- !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
+ const bool can_chain_next = !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
/* Follower needs to be first because the last CS must match the queue's IP type. */
if (radv_cmd_buffer_has_follower(cmd_buffer)) {
queue->device->ws->cs_unchain(cmd_buffer->gang.cs);
- if (!chainable_ace ||
- !queue->device->ws->cs_chain(chainable_ace, cmd_buffer->gang.cs, false))
+ if (!chainable_ace || !queue->device->ws->cs_chain(chainable_ace, cmd_buffer->gang.cs, false))
cs_array[num_submitted_cs++] = cmd_buffer->gang.cs;
chainable_ace = can_chain_next ? cmd_buffer->gang.cs : NULL;
}
queue->device->ws->cs_unchain(cmd_buffer->cs);
- if (!chainable ||
- !queue->device->ws->cs_chain(chainable, cmd_buffer->cs, queue->state.uses_shadow_regs))
+ if (!chainable || !queue->device->ws->cs_chain(chainable, cmd_buffer->cs, queue->state.uses_shadow_regs))
cs_array[num_submitted_cs++] = cmd_buffer->cs;
chainable = can_chain_next ? cmd_buffer->cs : NULL;
submit.cs_count = num_submitted_cs;
submit.initial_preamble_count = submit_ace ? num_initial_preambles : num_1q_initial_preambles;
- submit.continue_preamble_count =
- submit_ace ? num_continue_preambles : num_1q_continue_preambles;
+ submit.continue_preamble_count = submit_ace ? num_continue_preambles : num_1q_continue_preambles;
submit.postamble_count = submit_ace ? num_postambles : num_1q_postambles;
result = queue->device->ws->cs_submit(ctx, &submit, j == 0 ? wait_count : 0, waits,
- last_submit ? submission->signal_count : 0,
- submission->signals);
+ last_submit ? submission->signal_count : 0, submission->signals);
if (result != VK_SUCCESS)
goto fail;
initial_preambles[1] = !use_ace ? NULL : queue->follower_state->initial_preamble_cs;
}
- queue->last_shader_upload_seq =
- MAX2(queue->last_shader_upload_seq, shader_upload_seq);
+ queue->last_shader_upload_seq = MAX2(queue->last_shader_upload_seq, shader_upload_seq);
fail:
free(cs_array);
if (result != VK_SUCCESS)
return result;
- queue->state.uses_shadow_regs =
- device->uses_shadow_regs && queue->state.qf == RADV_QUEUE_GENERAL;
+ queue->state.uses_shadow_regs = device->uses_shadow_regs && queue->state.qf == RADV_QUEUE_GENERAL;
if (queue->state.uses_shadow_regs) {
result = radv_create_shadow_regs_preamble(device, &queue->state);
if (result != VK_SUCCESS)
/* These are uint64_t to tell the compiler that buf can't alias them.
* If they're uint32_t the generated code needs to redundantly
* store and reload them between buf writes. */
- uint64_t cdw; /* Number of used dwords. */
- uint64_t max_dw; /* Maximum number of dwords. */
+ uint64_t cdw; /* Number of used dwords. */
+ uint64_t max_dw; /* Maximum number of dwords. */
uint64_t reserved_dw; /* Number of dwords reserved through radeon_check_space() */
- uint32_t *buf; /* The base pointer of the chunk. */
+ uint32_t *buf; /* The base pointer of the chunk. */
};
#define RADEON_SURF_TYPE_MASK 0xFF
#define RADEON_SURF_MODE_MASK 0xFF
#define RADEON_SURF_MODE_SHIFT 8
-#define RADEON_SURF_GET(v, field) \
- (((v) >> RADEON_SURF_##field##_SHIFT) & RADEON_SURF_##field##_MASK)
+#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_##field##_SHIFT) & RADEON_SURF_##field##_MASK)
#define RADEON_SURF_SET(v, field) (((v)&RADEON_SURF_##field##_MASK) << RADEON_SURF_##field##_SHIFT)
-#define RADEON_SURF_CLR(v, field) \
- ((v) & ~(RADEON_SURF_##field##_MASK << RADEON_SURF_##field##_SHIFT))
+#define RADEON_SURF_CLR(v, field) ((v) & ~(RADEON_SURF_##field##_MASK << RADEON_SURF_##field##_SHIFT))
enum radeon_bo_layout {
RADEON_LAYOUT_LINEAR = 0,
uint64_t (*query_value)(struct radeon_winsys *ws, enum radeon_value_id value);
- bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, unsigned num_registers,
- uint32_t *out);
+ bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, unsigned num_registers, uint32_t *out);
const char *(*get_chip_name)(struct radeon_winsys *ws);
- VkResult (*buffer_create)(struct radeon_winsys *ws, uint64_t size, unsigned alignment,
- enum radeon_bo_domain domain, enum radeon_bo_flag flags,
- unsigned priority, uint64_t address, struct radeon_winsys_bo **out_bo);
+ VkResult (*buffer_create)(struct radeon_winsys *ws, uint64_t size, unsigned alignment, enum radeon_bo_domain domain,
+ enum radeon_bo_flag flags, unsigned priority, uint64_t address,
+ struct radeon_winsys_bo **out_bo);
void (*buffer_destroy)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo);
void *(*buffer_map)(struct radeon_winsys_bo *bo);
- VkResult (*buffer_from_ptr)(struct radeon_winsys *ws, void *pointer, uint64_t size,
- unsigned priority, struct radeon_winsys_bo **out_bo);
+ VkResult (*buffer_from_ptr)(struct radeon_winsys *ws, void *pointer, uint64_t size, unsigned priority,
+ struct radeon_winsys_bo **out_bo);
- VkResult (*buffer_from_fd)(struct radeon_winsys *ws, int fd, unsigned priority,
- struct radeon_winsys_bo **out_bo, uint64_t *alloc_size);
+ VkResult (*buffer_from_fd)(struct radeon_winsys *ws, int fd, unsigned priority, struct radeon_winsys_bo **out_bo,
+ uint64_t *alloc_size);
bool (*buffer_get_fd)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo, int *fd);
- bool (*buffer_get_flags_from_fd)(struct radeon_winsys *ws, int fd,
- enum radeon_bo_domain *domains, enum radeon_bo_flag *flags);
+ bool (*buffer_get_flags_from_fd)(struct radeon_winsys *ws, int fd, enum radeon_bo_domain *domains,
+ enum radeon_bo_flag *flags);
void (*buffer_unmap)(struct radeon_winsys_bo *bo);
- void (*buffer_set_metadata)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo,
- struct radeon_bo_metadata *md);
- void (*buffer_get_metadata)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo,
- struct radeon_bo_metadata *md);
+ void (*buffer_set_metadata)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo, struct radeon_bo_metadata *md);
+ void (*buffer_get_metadata)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo, struct radeon_bo_metadata *md);
- VkResult (*buffer_virtual_bind)(struct radeon_winsys *ws, struct radeon_winsys_bo *parent,
- uint64_t offset, uint64_t size, struct radeon_winsys_bo *bo,
- uint64_t bo_offset);
+ VkResult (*buffer_virtual_bind)(struct radeon_winsys *ws, struct radeon_winsys_bo *parent, uint64_t offset,
+ uint64_t size, struct radeon_winsys_bo *bo, uint64_t bo_offset);
- VkResult (*buffer_make_resident)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo,
- bool resident);
+ VkResult (*buffer_make_resident)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo, bool resident);
- VkResult (*ctx_create)(struct radeon_winsys *ws, enum radeon_ctx_priority priority,
- struct radeon_winsys_ctx **ctx);
+ VkResult (*ctx_create)(struct radeon_winsys *ws, enum radeon_ctx_priority priority, struct radeon_winsys_ctx **ctx);
void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx, enum amd_ip_type amd_ip_type, int ring_index);
enum radeon_bo_domain (*cs_domain)(const struct radeon_winsys *ws);
- struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys *ws, enum amd_ip_type amd_ip_type,
- bool is_secondary);
+ struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys *ws, enum amd_ip_type amd_ip_type, bool is_secondary);
void (*cs_destroy)(struct radeon_cmdbuf *cs);
void (*cs_grow)(struct radeon_cmdbuf *cs, size_t min_size);
- VkResult (*cs_submit)(struct radeon_winsys_ctx *ctx,
- const struct radv_winsys_submit_info *submit, uint32_t wait_count,
- const struct vk_sync_wait *waits, uint32_t signal_count,
+ VkResult (*cs_submit)(struct radeon_winsys_ctx *ctx, const struct radv_winsys_submit_info *submit,
+ uint32_t wait_count, const struct vk_sync_wait *waits, uint32_t signal_count,
const struct vk_sync_signal *signals);
void (*cs_add_buffer)(struct radeon_cmdbuf *cs, struct radeon_winsys_bo *bo);
- void (*cs_execute_secondary)(struct radeon_cmdbuf *parent, struct radeon_cmdbuf *child,
- bool allow_ib2);
+ void (*cs_execute_secondary)(struct radeon_cmdbuf *parent, struct radeon_cmdbuf *child, bool allow_ib2);
void (*cs_dump)(struct radeon_cmdbuf *cs, FILE *file, const int *trace_ids, int trace_id_count);
void (*dump_bo_log)(struct radeon_winsys *ws, FILE *file);
- int (*surface_init)(struct radeon_winsys *ws, const struct ac_surf_info *surf_info,
- struct radeon_surf *surf);
+ int (*surface_init)(struct radeon_winsys *ws, const struct ac_surf_info *surf_info, struct radeon_surf *surf);
int (*get_fd)(struct radeon_winsys *ws);
open_event_file(const char *event_name, const char *event_filename, const char *mode)
{
char filename[2048];
- snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/events/amdgpu/%s/%s",
- event_name, event_filename);
+ snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/events/amdgpu/%s/%s", event_name, event_filename);
return fopen(filename, mode);
}
*dst_fd = -1;
#else
char filename[2048];
- snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/per_cpu/cpu%d/trace_pipe_raw",
- cpu_index);
+ snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/per_cpu/cpu%d/trace_pipe_raw", cpu_index);
/* I/O to the pipe needs to be non-blocking, otherwise reading all available
* data would block indefinitely by waiting for more data to be written to the pipe */
*dst_fd = open(filename, O_RDONLY | O_NONBLOCK);
int32_t commit;
};
-enum trace_event_type {
- TRACE_EVENT_TYPE_PADDING = 29,
- TRACE_EVENT_TYPE_EXTENDED_DELTA,
- TRACE_EVENT_TYPE_TIMESTAMP
-};
+enum trace_event_type { TRACE_EVENT_TYPE_PADDING = 29, TRACE_EVENT_TYPE_EXTENDED_DELTA, TRACE_EVENT_TYPE_TIMESTAMP };
struct trace_event_header {
uint32_t type_len : 5;
static void
emit_page_table_update_event(struct vk_memory_trace_data *data, bool is_apu, uint64_t timestamp,
- struct trace_event_amdgpu_vm_update_ptes *event, uint64_t *addrs,
- unsigned int pte_index)
+ struct trace_event_amdgpu_vm_update_ptes *event, uint64_t *addrs, unsigned int pte_index)
{
struct vk_rmv_token token;
token.timestamp = timestamp;
token.data.page_table_update.type = VK_RMV_PAGE_TABLE_UPDATE_TYPE_UPDATE;
token.data.page_table_update.page_size = event->incr;
- token.data.page_table_update.page_count =
- (end_addr - start_addr) * MIN_GPU_PAGE_SIZE / event->incr;
+ token.data.page_table_update.page_count = (end_addr - start_addr) * MIN_GPU_PAGE_SIZE / event->incr;
token.data.page_table_update.pid = event->common.pid;
- token.data.page_table_update.virtual_address =
- event->start * MIN_GPU_PAGE_SIZE + pte_index * event->incr;
+ token.data.page_table_update.virtual_address = event->start * MIN_GPU_PAGE_SIZE + pte_index * event->incr;
/* RMV expects mappings to system memory to have a physical address of 0.
* Even with traces generated by AMDGPU-PRO, on APUs without dedicated VRAM everything seems to
* be marked as "committed to system memory". */
- token.data.page_table_update.physical_address =
- event->flags & AMDGPU_PTE_SYSTEM || is_apu ? 0 : addrs[pte_index];
+ token.data.page_table_update.physical_address = event->flags & AMDGPU_PTE_SYSTEM || is_apu ? 0 : addrs[pte_index];
token.data.page_table_update.is_unmap = !(event->flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT));
util_dynarray_append(&data->tokens, struct vk_rmv_token, token);
struct trace_event_address_array *array = (struct trace_event_address_array *)(event + 1);
for (uint32_t i = 0; i < event->num_ptes; ++i)
- emit_page_table_update_event(&device->vk.memory_trace_data,
- !device->physical_device->rad_info.has_dedicated_vram, timestamp,
- event, (uint64_t *)array->data, i);
+ emit_page_table_update_event(&device->vk.memory_trace_data, !device->physical_device->rad_info.has_dedicated_vram,
+ timestamp, event, (uint64_t *)array->data, i);
}
static void
switch (index) {
case VK_RMV_MEMORY_LOCATION_DEVICE:
out_info->physical_base_address = 0;
- out_info->size = info->all_vram_visible ? (uint64_t)info->vram_size_kb * 1024ULL
- : (uint64_t)info->vram_vis_size_kb * 1024ULL;
+ out_info->size =
+ info->all_vram_visible ? (uint64_t)info->vram_size_kb * 1024ULL : (uint64_t)info->vram_vis_size_kb * 1024ULL;
break;
case VK_RMV_MEMORY_LOCATION_DEVICE_INVISIBLE:
out_info->physical_base_address = (uint64_t)info->vram_vis_size_kb * 1024ULL;
/* The token lock must be held when entering _locked functions */
static void
-log_resource_bind_locked(struct radv_device *device, uint64_t resource, struct radeon_winsys_bo *bo,
- uint64_t offset, uint64_t size)
+log_resource_bind_locked(struct radv_device *device, uint64_t resource, struct radeon_winsys_bo *bo, uint64_t offset,
+ uint64_t size)
{
struct vk_rmv_resource_bind_token token = {0};
token.address = bo->va + offset;
}
void
-radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t size,
- bool is_internal)
+radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t size, bool is_internal)
{
if (!device->vk.memory_trace_data.is_enabled)
return;
struct vk_rmv_virtual_allocate_token token = {0};
token.address = bo->va;
/* If all VRAM is visible, no bo will be in invisible memory. */
- token.is_in_invisible_vram =
- bo->vram_no_cpu_access && !device->physical_device->rad_info.all_vram_visible;
+ token.is_in_invisible_vram = bo->vram_no_cpu_access && !device->physical_device->rad_info.all_vram_visible;
token.preferred_domains = (enum vk_rmv_kernel_memory_domain)bo->initial_domain;
token.is_driver_internal = is_internal;
token.page_count = DIV_ROUND_UP(size, 4096);
}
void
-radv_rmv_log_image_create(struct radv_device *device, const VkImageCreateInfo *create_info,
- bool is_internal, VkImage _image)
+radv_rmv_log_image_create(struct radv_device *device, const VkImageCreateInfo *create_info, bool is_internal,
+ VkImage _image)
{
if (!device->vk.memory_trace_data.is_enabled)
return;
RADV_FROM_HANDLE(radv_image, image, _image);
simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
- log_resource_bind_locked(device, (uint64_t)_image, image->bindings[0].bo,
- image->bindings[0].offset, image->size);
+ log_resource_bind_locked(device, (uint64_t)_image, image->bindings[0].bo, image->bindings[0].offset, image->size);
simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
}
create_token.query_pool.type = pool->type;
create_token.query_pool.has_cpu_access = true;
- vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE,
- &create_token);
+ vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
log_resource_bind_locked(device, (uint64_t)_pool, pool->bo, 0, pool->size);
simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
}
void
-radv_rmv_log_command_buffer_bo_create(struct radv_device *device, struct radeon_winsys_bo *bo,
- uint32_t executable_size, uint32_t data_size,
- uint32_t scratch_size)
+radv_rmv_log_command_buffer_bo_create(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t executable_size,
+ uint32_t data_size, uint32_t scratch_size)
{
if (!device->vk.memory_trace_data.is_enabled)
return;
simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
struct vk_rmv_resource_create_token create_token = {0};
create_token.is_driver_internal = true;
- create_token.resource_id =
- vk_rmv_get_resource_id_locked(&device->vk, upload_resource_identifier);
+ create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, upload_resource_identifier);
create_token.type = VK_RMV_RESOURCE_TYPE_COMMAND_ALLOCATOR;
- create_token.command_buffer.preferred_domain =
- (enum vk_rmv_kernel_memory_domain)device->ws->cs_domain(device->ws);
+ create_token.command_buffer.preferred_domain = (enum vk_rmv_kernel_memory_domain)device->ws->cs_domain(device->ws);
create_token.command_buffer.executable_size = executable_size;
create_token.command_buffer.app_available_executable_size = executable_size;
create_token.command_buffer.embedded_data_size = data_size;
create_token.command_buffer.scratch_size = scratch_size;
create_token.command_buffer.app_available_scratch_size = scratch_size;
- vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE,
- &create_token);
+ vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
log_resource_bind_locked(device, upload_resource_identifier, bo, 0, size);
simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
vk_rmv_log_cpu_map(&device->vk, bo->va, false);
struct vk_rmv_resource_destroy_token destroy_token = {0};
destroy_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo);
- vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY,
- &destroy_token);
+ vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &destroy_token);
vk_rmv_destroy_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo);
simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
radv_rmv_log_bo_destroy(device, bo);
bind_token.resource_id = resource_id;
bind_token.size = RADV_BORDER_COLOR_BUFFER_SIZE;
- vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE,
- &create_token);
+ vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &bind_token);
simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
vk_rmv_log_cpu_map(&device->vk, bo->va, false);
}
void
-radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo,
- uint64_t offset)
+radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset)
{
if (!device->vk.memory_trace_data.is_enabled)
return;
}
void
-radv_rmv_log_sparse_remove_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo,
- uint64_t offset)
+radv_rmv_log_sparse_remove_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset)
{
if (!device->vk.memory_trace_data.is_enabled)
return;
}
void
-radv_rmv_log_descriptor_pool_create(struct radv_device *device,
- const VkDescriptorPoolCreateInfo *create_info,
+radv_rmv_log_descriptor_pool_create(struct radv_device *device, const VkDescriptorPoolCreateInfo *create_info,
VkDescriptorPool _pool, bool is_internal)
{
if (!device->vk.memory_trace_data.is_enabled)
create_token.descriptor_pool.max_sets = create_info->maxSets;
create_token.descriptor_pool.pool_size_count = create_info->poolSizeCount;
/* Using vk_rmv_token_pool_alloc frees the allocation automatically when the trace is done. */
- create_token.descriptor_pool.pool_sizes =
- malloc(create_info->poolSizeCount * sizeof(VkDescriptorPoolSize));
+ create_token.descriptor_pool.pool_sizes = malloc(create_info->poolSizeCount * sizeof(VkDescriptorPoolSize));
if (!create_token.descriptor_pool.pool_sizes)
return;
memcpy(create_token.descriptor_pool.pool_sizes, create_info->pPoolSizes,
create_info->poolSizeCount * sizeof(VkDescriptorPoolSize));
- vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE,
- &create_token);
+ vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
if (pool->bo) {
bind_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool);
bind_token.size = pool->size;
- vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND,
- &bind_token);
+ vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &bind_token);
simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
}
}
create_token.pipeline.is_ngg = graphics_pipeline->is_ngg;
create_token.pipeline.shader_stages = graphics_pipeline->active_stages;
- vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE,
- &create_token);
+ vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
struct radv_shader *shader = pipeline->shaders[s];
if (!shader)
continue;
- log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset,
- shader->alloc->size);
+ log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size);
}
simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
}
VkPipeline _pipeline = radv_pipeline_to_handle(pipeline);
- VkShaderStageFlagBits active_stages = pipeline->type == RADV_PIPELINE_COMPUTE
- ? VK_SHADER_STAGE_COMPUTE_BIT
- : VK_SHADER_STAGE_RAYGEN_BIT_KHR;
+ VkShaderStageFlagBits active_stages =
+ pipeline->type == RADV_PIPELINE_COMPUTE ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_RAYGEN_BIT_KHR;
simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
struct vk_rmv_resource_create_token create_token = {0};
create_token.pipeline.is_ngg = false;
create_token.pipeline.shader_stages = active_stages;
- vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE,
- &create_token);
+ vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
struct radv_shader *shader = pipeline->shaders[vk_to_mesa_shader_stage(active_stages)];
- log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset,
- shader->alloc->size);
+ log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size);
simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
}
void
-radv_rmv_log_event_create(struct radv_device *device, VkEvent _event, VkEventCreateFlags flags,
- bool is_internal)
+radv_rmv_log_event_create(struct radv_device *device, VkEvent _event, VkEventCreateFlags flags, bool is_internal)
{
if (!device->vk.memory_trace_data.is_enabled)
return;
create_token.event.flags = flags;
create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_event);
- vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE,
- &create_token);
+ vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
log_resource_bind_locked(device, (uint64_t)_event, event->bo, 0, 8);
simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
uint64_t unused;
};
-static_assert(sizeof(struct rra_file_chunk_description) == 64,
- "rra_file_chunk_description does not match RRA spec");
+static_assert(sizeof(struct rra_file_chunk_description) == 64, "rra_file_chunk_description does not match RRA spec");
static uint64_t
node_to_addr(uint64_t node)
}
static void
-rra_dump_chunk_description(uint64_t offset, uint64_t header_size, uint64_t data_size,
- const char *name, enum rra_chunk_type type, FILE *output)
+rra_dump_chunk_description(uint64_t offset, uint64_t header_size, uint64_t data_size, const char *name,
+ enum rra_chunk_type type, FILE *output)
{
struct rra_file_chunk_description chunk = {
.type = type,
.rev_id = rad_info->pci_rev_id,
};
- strncpy(asic_info.device_name,
- rad_info->marketing_name ? rad_info->marketing_name : rad_info->name,
+ strncpy(asic_info.device_name, rad_info->marketing_name ? rad_info->marketing_name : rad_info->name,
RRA_FILE_DEVICE_NAME_MAX_SIZE - 1);
fwrite(&asic_info, sizeof(struct rra_asic_info), 1, output);
#define RRA_ROOT_NODE_OFFSET align(sizeof(struct rra_accel_struct_header), 64)
-static_assert(sizeof(struct rra_accel_struct_header) == 120,
- "rra_accel_struct_header does not match RRA spec");
+static_assert(sizeof(struct rra_accel_struct_header) == 120, "rra_accel_struct_header does not match RRA spec");
struct rra_accel_struct_metadata {
uint64_t virtual_address;
char unused[116];
};
-static_assert(sizeof(struct rra_accel_struct_metadata) == 128,
- "rra_accel_struct_metadata does not match RRA spec");
+static_assert(sizeof(struct rra_accel_struct_metadata) == 128, "rra_accel_struct_metadata does not match RRA spec");
struct rra_geometry_info {
uint32_t primitive_count : 29;
static_assert(sizeof(struct rra_geometry_info) == 12, "rra_geometry_info does not match RRA spec");
static struct rra_accel_struct_header
-rra_fill_accel_struct_header_common(struct radv_accel_struct_header *header,
- size_t parent_id_table_size, size_t leaf_node_data_size,
- size_t internal_node_data_size, uint64_t primitive_count)
+rra_fill_accel_struct_header_common(struct radv_accel_struct_header *header, size_t parent_id_table_size,
+ size_t leaf_node_data_size, size_t internal_node_data_size,
+ uint64_t primitive_count)
{
struct rra_accel_struct_header result = {
.post_build_info =
};
result.metadata_size = sizeof(struct rra_accel_struct_metadata) + parent_id_table_size;
- result.file_size = result.metadata_size + sizeof(struct rra_accel_struct_header) +
- internal_node_data_size + leaf_node_data_size;
+ result.file_size =
+ result.metadata_size + sizeof(struct rra_accel_struct_header) + internal_node_data_size + leaf_node_data_size;
result.internal_nodes_offset = sizeof(struct rra_accel_struct_metadata);
result.leaf_nodes_offset = result.internal_nodes_offset + internal_node_data_size;
float otw_matrix[12];
};
-static_assert(sizeof(struct rra_instance_node) == 128,
- "rra_instance_node does not match RRA spec!");
+static_assert(sizeof(struct rra_instance_node) == 128, "rra_instance_node does not match RRA spec!");
/*
* Format RRA uses for aabb nodes
static_assert(sizeof(struct rra_triangle_node) == 64, "rra_triangle_node does not match RRA spec!");
static void
-rra_dump_tlas_header(struct radv_accel_struct_header *header, size_t parent_id_table_size,
- size_t leaf_node_data_size, size_t internal_node_data_size,
- uint64_t primitive_count, FILE *output)
+rra_dump_tlas_header(struct radv_accel_struct_header *header, size_t parent_id_table_size, size_t leaf_node_data_size,
+ size_t internal_node_data_size, uint64_t primitive_count, FILE *output)
{
struct rra_accel_struct_header file_header = rra_fill_accel_struct_header_common(
header, parent_id_table_size, leaf_node_data_size, internal_node_data_size, primitive_count);
static void
rra_dump_blas_header(struct radv_accel_struct_header *header, size_t parent_id_table_size,
- struct radv_accel_struct_geometry_info *geometry_infos,
- size_t leaf_node_data_size, size_t internal_node_data_size,
- uint64_t primitive_count, FILE *output)
+ struct radv_accel_struct_geometry_info *geometry_infos, size_t leaf_node_data_size,
+ size_t internal_node_data_size, uint64_t primitive_count, FILE *output)
{
struct rra_accel_struct_header file_header = rra_fill_accel_struct_header_common(
header, parent_id_table_size, leaf_node_data_size, internal_node_data_size, primitive_count);
file_header.post_build_info.bvh_type = RRA_BVH_TYPE_BLAS;
- file_header.geometry_type =
- header->geometry_count ? geometry_infos->type : VK_GEOMETRY_TYPE_TRIANGLES_KHR;
+ file_header.geometry_type = header->geometry_count ? geometry_infos->type : VK_GEOMETRY_TYPE_TRIANGLES_KHR;
fwrite(&file_header, sizeof(struct rra_accel_struct_header), 1, output);
}
char location[31];
};
-static void PRINTFLIKE(2, 3)
-rra_validation_fail(struct rra_validation_context *ctx, const char *message, ...)
+static void PRINTFLIKE(2, 3) rra_validation_fail(struct rra_validation_context *ctx, const char *message, ...)
{
if (!ctx->failed) {
fprintf(stderr, "radv: rra: Validation failed at %s:\n", ctx->location);
}
static bool
-rra_validate_header(struct radv_rra_accel_struct_data *accel_struct,
- const struct radv_accel_struct_header *header)
+rra_validate_header(struct radv_rra_accel_struct_data *accel_struct, const struct radv_accel_struct_header *header)
{
struct rra_validation_context ctx = {
.location = "header",
};
- if (accel_struct->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR &&
- header->instance_count > 0)
+ if (accel_struct->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR && header->instance_count > 0)
rra_validation_fail(&ctx, "BLAS contains instances");
if (header->bvh_offset >= accel_struct->size)
};
static bool
-rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data, void *node,
- uint32_t geometry_count, uint32_t size, bool is_bottom_level)
+rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data, void *node, uint32_t geometry_count,
+ uint32_t size, bool is_bottom_level)
{
struct rra_validation_context ctx = {0};
if (!is_internal_node(type) && is_bottom_level == (type == radv_bvh_node_instance))
rra_validation_fail(&ctx,
- is_bottom_level ? "%s node in BLAS (child index %u)"
- : "%s node in TLAS (child index %u)",
+ is_bottom_level ? "%s node in BLAS (child index %u)" : "%s node in TLAS (child index %u)",
node_type_names[type], i);
if (offset > size) {
}
struct rra_validation_context child_ctx = {0};
- snprintf(child_ctx.location, sizeof(child_ctx.location), "%s node (offset=%u)",
- node_type_names[type], offset);
+ snprintf(child_ctx.location, sizeof(child_ctx.location), "%s node (offset=%u)", node_type_names[type], offset);
if (is_internal_node(type)) {
- ctx.failed |= rra_validate_node(accel_struct_vas, data, data + offset, geometry_count,
- size, is_bottom_level);
+ ctx.failed |= rra_validate_node(accel_struct_vas, data, data + offset, geometry_count, size, is_bottom_level);
} else if (type == radv_bvh_node_instance) {
struct radv_bvh_instance_node *src = (struct radv_bvh_instance_node *)(data + offset);
uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset;
};
static void
-rra_transcode_triangle_node(struct rra_transcoding_context *ctx,
- const struct radv_bvh_triangle_node *src)
+rra_transcode_triangle_node(struct rra_transcoding_context *ctx, const struct radv_bvh_triangle_node *src)
{
struct rra_triangle_node *dst = (struct rra_triangle_node *)(ctx->dst + ctx->dst_leaf_offset);
ctx->dst_leaf_offset += sizeof(struct rra_triangle_node);
}
static void
-rra_transcode_aabb_node(struct rra_transcoding_context *ctx, const struct radv_bvh_aabb_node *src,
- radv_aabb bounds)
+rra_transcode_aabb_node(struct rra_transcoding_context *ctx, const struct radv_bvh_aabb_node *src, radv_aabb bounds)
{
struct rra_aabb_node *dst = (struct rra_aabb_node *)(ctx->dst + ctx->dst_leaf_offset);
ctx->dst_leaf_offset += sizeof(struct rra_aabb_node);
}
static void
-rra_transcode_instance_node(struct rra_transcoding_context *ctx,
- const struct radv_bvh_instance_node *src)
+rra_transcode_instance_node(struct rra_transcoding_context *ctx, const struct radv_bvh_instance_node *src)
{
uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset;
memcpy(dst->otw_matrix, src->otw_matrix.values, sizeof(dst->otw_matrix));
}
-static uint32_t rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id,
- uint32_t src_id, radv_aabb bounds);
+static uint32_t rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id,
+ radv_aabb bounds);
static void
rra_transcode_box16_node(struct rra_transcoding_context *ctx, const struct radv_bvh_box16_node *src)
},
};
- dst->children[i] =
- rra_transcode_node(ctx, radv_bvh_node_box16 | (dst_offset >> 3), src->children[i], bounds);
+ dst->children[i] = rra_transcode_node(ctx, radv_bvh_node_box16 | (dst_offset >> 3), src->children[i], bounds);
}
}
continue;
}
- dst->children[i] = rra_transcode_node(ctx, radv_bvh_node_box32 | (dst_offset >> 3),
- src->children[i], src->coords[i]);
+ dst->children[i] =
+ rra_transcode_node(ctx, radv_bvh_node_box32 | (dst_offset >> 3), src->children[i], src->coords[i]);
}
}
}
static uint32_t
-rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id,
- radv_aabb bounds)
+rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id, radv_aabb bounds)
{
uint32_t node_type = src_id & 7;
uint32_t src_offset = (src_id & (~7u)) << 3;
rra_transcode_instance_node(ctx, src_child_node);
}
- uint32_t parent_id_index =
- rra_parent_table_index_from_offset(dst_offset, ctx->parent_id_table_size);
+ uint32_t parent_id_index = rra_parent_table_index_from_offset(dst_offset, ctx->parent_id_table_size);
ctx->parent_id_table[parent_id_index] = parent_id;
uint32_t dst_id = node_type | (dst_offset >> 3);
static VkResult
rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct, uint8_t *data,
- struct hash_table_u64 *accel_struct_vas, bool should_validate,
- FILE *output)
+ struct hash_table_u64 *accel_struct_vas, bool should_validate, FILE *output)
{
struct radv_accel_struct_header *header = (struct radv_accel_struct_header *)data;
if (rra_validate_header(accel_struct, header)) {
return VK_ERROR_VALIDATION_FAILED_EXT;
}
- if (rra_validate_node(accel_struct_vas, data + header->bvh_offset,
- data + header->bvh_offset + src_root_offset, header->geometry_count,
- accel_struct->size, !is_tlas)) {
+ if (rra_validate_node(accel_struct_vas, data + header->bvh_offset, data + header->bvh_offset + src_root_offset,
+ header->geometry_count, accel_struct->size, !is_tlas)) {
return VK_ERROR_VALIDATION_FAILED_EXT;
}
}
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto exit;
}
- dst_structure_data =
- calloc(RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size, 1);
+ dst_structure_data = calloc(RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size, 1);
if (!dst_structure_data) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto exit;
struct rra_accel_struct_metadata rra_metadata = {
.virtual_address = va,
- .byte_size = bvh_info.leaf_nodes_size + bvh_info.internal_nodes_size +
- sizeof(struct rra_accel_struct_header),
+ .byte_size = bvh_info.leaf_nodes_size + bvh_info.internal_nodes_size + sizeof(struct rra_accel_struct_header),
};
fwrite(&chunk_header, sizeof(struct rra_accel_struct_chunk_header), 1, output);
fwrite(node_parent_table, 1, node_parent_table_size, output);
if (is_tlas)
- rra_dump_tlas_header(header, node_parent_table_size, bvh_info.leaf_nodes_size,
- bvh_info.internal_nodes_size, primitive_count, output);
+ rra_dump_tlas_header(header, node_parent_table_size, bvh_info.leaf_nodes_size, bvh_info.internal_nodes_size,
+ primitive_count, output);
else
rra_dump_blas_header(header, node_parent_table_size, geometry_infos, bvh_info.leaf_nodes_size,
bvh_info.internal_nodes_size, primitive_count, output);
/* Write acceleration structure data */
- fwrite(dst_structure_data + RRA_ROOT_NODE_OFFSET, 1,
- bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size, output);
+ fwrite(dst_structure_data + RRA_ROOT_NODE_OFFSET, 1, bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size,
+ output);
if (!is_tlas)
fwrite(rra_geometry_infos, sizeof(struct rra_geometry_info), header->geometry_count, output);
device->rra_trace.elapsed_frames = 0;
device->rra_trace.trigger_file = radv_rra_trace_trigger_file();
device->rra_trace.validate_as = debug_get_bool_option("RADV_RRA_TRACE_VALIDATE", false);
- device->rra_trace.copy_after_build =
- debug_get_bool_option("RADV_RRA_TRACE_COPY_AFTER_BUILD", false);
+ device->rra_trace.copy_after_build = debug_get_bool_option("RADV_RRA_TRACE_COPY_AFTER_BUILD", false);
device->rra_trace.accel_structs = _mesa_pointer_hash_table_create(NULL);
device->rra_trace.accel_struct_vas = _mesa_hash_table_u64_create(NULL);
simple_mtx_init(&device->rra_trace.data_mtx, mtx_plain);
- device->rra_trace.copy_memory_index =
- radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
- VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
+ device->rra_trace.copy_memory_index = radv_find_memory_index(
+ device->physical_device,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
}
void
if (result != VK_SUCCESS)
goto fail_buffer;
- result =
- vk_common_MapMemory(ctx->device, ctx->memory, 0, VK_WHOLE_SIZE, 0, (void **)&ctx->mapped_data);
+ result = vk_common_MapMemory(ctx->device, ctx->memory, 0, VK_WHOLE_SIZE, 0, (void **)&ctx->mapped_data);
if (result != VK_SUCCESS)
goto fail_memory;
uint64_t written_accel_struct_count = 0;
struct hash_entry *last_entry = NULL;
- for (unsigned i = 0;
- (last_entry = _mesa_hash_table_next_entry(device->rra_trace.accel_structs, last_entry));
- ++i)
+ for (unsigned i = 0; (last_entry = _mesa_hash_table_next_entry(device->rra_trace.accel_structs, last_entry)); ++i)
hash_entries[i] = last_entry;
qsort(hash_entries, struct_count, sizeof(*hash_entries), accel_struct_entry_cmp);
continue;
accel_struct_offsets[written_accel_struct_count] = (uint64_t)ftell(file);
- result =
- rra_dump_acceleration_structure(data, mapped_data, device->rra_trace.accel_struct_vas,
- device->rra_trace.validate_as, file);
+ result = rra_dump_acceleration_structure(data, mapped_data, device->rra_trace.accel_struct_vas,
+ device->rra_trace.validate_as, file);
rra_unmap_accel_struct_data(©_ctx, i);
rra_copy_context_finish(©_ctx);
uint64_t chunk_info_offset = (uint64_t)ftell(file);
- rra_dump_chunk_description(api_info_offset, 0, 8, "ApiInfo", RADV_RRA_CHUNK_ID_ASIC_API_INFO,
- file);
+ rra_dump_chunk_description(api_info_offset, 0, 8, "ApiInfo", RADV_RRA_CHUNK_ID_ASIC_API_INFO, file);
rra_dump_chunk_description(asic_info_offset, 0, sizeof(struct rra_asic_info), "AsicInfo",
RADV_RRA_CHUNK_ID_ASIC_API_INFO, file);
else
accel_struct_size = (uint64_t)(accel_struct_offsets[i + 1] - accel_struct_offsets[i]);
- rra_dump_chunk_description(accel_struct_offsets[i],
- sizeof(struct rra_accel_struct_chunk_header), accel_struct_size,
- "RawAccelStruct", RADV_RRA_CHUNK_ID_ACCEL_STRUCT, file);
+ rra_dump_chunk_description(accel_struct_offsets[i], sizeof(struct rra_accel_struct_chunk_header),
+ accel_struct_size, "RawAccelStruct", RADV_RRA_CHUNK_ID_ACCEL_STRUCT, file);
}
uint64_t file_end = (uint64_t)ftell(file);
#include <llvm/Config/llvm-config.h>
#endif
-static nir_ssa_def *build_node_to_addr(struct radv_device *device, nir_builder *b,
- nir_ssa_def *node, bool skip_type_and);
+static nir_ssa_def *build_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node,
+ bool skip_type_and);
bool
radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines)
}
void
-nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices,
- uint32_t chan_1, uint32_t chan_2)
+nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices, uint32_t chan_1,
+ uint32_t chan_2)
{
nir_ssa_def *ssa_distances = nir_load_var(b, var_distances);
nir_ssa_def *ssa_indices = nir_load_var(b, var_indices);
/* if (distances[chan_2] < distances[chan_1]) { */
- nir_push_if(
- b, nir_flt(b, nir_channel(b, ssa_distances, chan_2), nir_channel(b, ssa_distances, chan_1)));
+ nir_push_if(b, nir_flt(b, nir_channel(b, ssa_distances, chan_2), nir_channel(b, ssa_distances, chan_1)));
{
/* swap(distances[chan_2], distances[chan_1]); */
- nir_ssa_def *new_distances[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32),
- nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32)};
- nir_ssa_def *new_indices[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32),
- nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32)};
+ nir_ssa_def *new_distances[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32),
+ nir_ssa_undef(b, 1, 32)};
+ nir_ssa_def *new_indices[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32),
+ nir_ssa_undef(b, 1, 32)};
new_distances[chan_2] = nir_channel(b, ssa_distances, chan_1);
new_distances[chan_1] = nir_channel(b, ssa_distances, chan_2);
new_indices[chan_2] = nir_channel(b, ssa_indices, chan_1);
new_indices[chan_1] = nir_channel(b, ssa_indices, chan_2);
- nir_store_var(b, var_distances, nir_vec(b, new_distances, 4),
- (1u << chan_1) | (1u << chan_2));
+ nir_store_var(b, var_distances, nir_vec(b, new_distances, 4), (1u << chan_1) | (1u << chan_2));
nir_store_var(b, var_indices, nir_vec(b, new_indices, 4), (1u << chan_1) | (1u << chan_2));
}
/* } */
}
nir_ssa_def *
-intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node,
- nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir,
- nir_ssa_def *inv_dir)
+intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node, nir_ssa_def *ray_tmax,
+ nir_ssa_def *origin, nir_ssa_def *dir, nir_ssa_def *inv_dir)
{
const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
const struct glsl_type *uvec4_type = glsl_vector_type(GLSL_TYPE_UINT, 4);
nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node, false);
/* vec4 distances = vec4(INF, INF, INF, INF); */
- nir_variable *distances =
- nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "distances");
+ nir_variable *distances = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "distances");
nir_store_var(b, distances, nir_imm_vec4(b, INFINITY, INFINITY, INFINITY, INFINITY), 0xf);
/* uvec4 child_indices = uvec4(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff); */
- nir_variable *child_indices =
- nir_variable_create(b->shader, nir_var_shader_temp, uvec4_type, "child_indices");
- nir_store_var(b, child_indices,
- nir_imm_ivec4(b, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu), 0xf);
+ nir_variable *child_indices = nir_variable_create(b->shader, nir_var_shader_temp, uvec4_type, "child_indices");
+ nir_store_var(b, child_indices, nir_imm_ivec4(b, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu), 0xf);
/* Need to remove infinities here because otherwise we get nasty NaN propagation
* if the direction has 0s in it. */
};
/* node->children[i] -> uint */
- nir_ssa_def *child_index =
- nir_build_load_global(b, 1, 32, nir_iadd_imm(b, node_addr, child_offset), .align_mul = 64,
- .align_offset = child_offset % 64);
+ nir_ssa_def *child_index = nir_build_load_global(b, 1, 32, nir_iadd_imm(b, node_addr, child_offset),
+ .align_mul = 64, .align_offset = child_offset % 64);
/* node->coords[i][0], node->coords[i][1] -> vec3 */
nir_ssa_def *node_coords[2] = {
- nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0]),
- .align_mul = 64, .align_offset = coord_offsets[0] % 64),
- nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1]),
- .align_mul = 64, .align_offset = coord_offsets[1] % 64),
+ nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0]), .align_mul = 64,
+ .align_offset = coord_offsets[0] % 64),
+ nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1]), .align_mul = 64,
+ .align_offset = coord_offsets[1] % 64),
};
/* If x of the aabb min is NaN, then this is an inactive aabb.
* https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap36.html#VkAabbPositionsKHR
*/
nir_ssa_def *min_x = nir_channel(b, node_coords[0], 0);
- nir_ssa_def *min_x_is_not_nan =
- nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */
+ nir_ssa_def *min_x_is_not_nan = nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */
/* vec3 bound0 = (node->coords[i][0] - origin) * inv_dir; */
nir_ssa_def *bound0 = nir_fmul(b, nir_fsub(b, node_coords[0], origin), inv_dir);
/* float tmin = max(max(min(bound0.x, bound1.x), min(bound0.y, bound1.y)), min(bound0.z,
* bound1.z)); */
- nir_ssa_def *tmin =
- nir_fmax(b,
- nir_fmax(b, nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
- nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
- nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
+ nir_ssa_def *tmin = nir_fmax(b,
+ nir_fmax(b, nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
+ nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
+ nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
/* float tmax = min(min(max(bound0.x, bound1.x), max(bound0.y, bound1.y)), max(bound0.z,
* bound1.z)); */
- nir_ssa_def *tmax =
- nir_fmin(b,
- nir_fmin(b, nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
- nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
- nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
+ nir_ssa_def *tmax = nir_fmin(b,
+ nir_fmin(b, nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
+ nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
+ nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
/* if (!isnan(node->coords[i][0].x) && tmax >= max(0.0f, tmin) && tmin < ray_tmax) { */
- nir_push_if(b,
- nir_iand(b, min_x_is_not_nan,
- nir_iand(b, nir_fge(b, tmax, nir_fmax(b, nir_imm_float(b, 0.0f), tmin)),
- nir_flt(b, tmin, ray_tmax))));
+ nir_push_if(b, nir_iand(b, min_x_is_not_nan,
+ nir_iand(b, nir_fge(b, tmax, nir_fmax(b, nir_imm_float(b, 0.0f), tmin)),
+ nir_flt(b, tmin, ray_tmax))));
{
/* child_indices[i] = node->children[i]; */
nir_ssa_def *new_child_indices[4] = {child_index, child_index, child_index, child_index};
}
nir_ssa_def *
-intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node,
- nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir,
- nir_ssa_def *inv_dir)
+intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node, nir_ssa_def *ray_tmax,
+ nir_ssa_def *origin, nir_ssa_def *dir, nir_ssa_def *inv_dir)
{
const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
nir_channel(b, abs_dir, 2),
};
/* Find index of greatest value of abs_dir and put that as kz. */
- nir_ssa_def *kz = nir_bcsel(
- b, nir_fge(b, abs_dirs[0], abs_dirs[1]),
- nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[2]), nir_imm_int(b, 0), nir_imm_int(b, 2)),
- nir_bcsel(b, nir_fge(b, abs_dirs[1], abs_dirs[2]), nir_imm_int(b, 1), nir_imm_int(b, 2)));
+ nir_ssa_def *kz =
+ nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[1]),
+ nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[2]), nir_imm_int(b, 0), nir_imm_int(b, 2)),
+ nir_bcsel(b, nir_fge(b, abs_dirs[1], abs_dirs[2]), nir_imm_int(b, 1), nir_imm_int(b, 2)));
nir_ssa_def *kx = nir_imod_imm(b, nir_iadd_imm(b, kz, 1), 3);
nir_ssa_def *ky = nir_imod_imm(b, nir_iadd_imm(b, kx, 1), 3);
nir_ssa_def *k_indices[3] = {kx, ky, kz};
/* Swap kx and ky dimensions to preserve winding order */
unsigned swap_xy_swizzle[4] = {1, 0, 2, 3};
- k = nir_bcsel(b, nir_flt_imm(b, nir_vector_extract(b, dir, kz), 0.0f),
- nir_swizzle(b, k, swap_xy_swizzle, 3), k);
+ k = nir_bcsel(b, nir_flt_imm(b, nir_vector_extract(b, dir, kz), 0.0f), nir_swizzle(b, k, swap_xy_swizzle, 3), k);
kx = nir_channel(b, k, 0);
ky = nir_channel(b, k, 1);
nir_ssa_def *v_c = nir_fsub(b, node_coords[2], origin);
/* Perform shear and scale */
- nir_ssa_def *ax =
- nir_fsub(b, nir_vector_extract(b, v_a, kx), nir_fmul(b, sx, nir_vector_extract(b, v_a, kz)));
- nir_ssa_def *ay =
- nir_fsub(b, nir_vector_extract(b, v_a, ky), nir_fmul(b, sy, nir_vector_extract(b, v_a, kz)));
- nir_ssa_def *bx =
- nir_fsub(b, nir_vector_extract(b, v_b, kx), nir_fmul(b, sx, nir_vector_extract(b, v_b, kz)));
- nir_ssa_def *by =
- nir_fsub(b, nir_vector_extract(b, v_b, ky), nir_fmul(b, sy, nir_vector_extract(b, v_b, kz)));
- nir_ssa_def *cx =
- nir_fsub(b, nir_vector_extract(b, v_c, kx), nir_fmul(b, sx, nir_vector_extract(b, v_c, kz)));
- nir_ssa_def *cy =
- nir_fsub(b, nir_vector_extract(b, v_c, ky), nir_fmul(b, sy, nir_vector_extract(b, v_c, kz)));
+ nir_ssa_def *ax = nir_fsub(b, nir_vector_extract(b, v_a, kx), nir_fmul(b, sx, nir_vector_extract(b, v_a, kz)));
+ nir_ssa_def *ay = nir_fsub(b, nir_vector_extract(b, v_a, ky), nir_fmul(b, sy, nir_vector_extract(b, v_a, kz)));
+ nir_ssa_def *bx = nir_fsub(b, nir_vector_extract(b, v_b, kx), nir_fmul(b, sx, nir_vector_extract(b, v_b, kz)));
+ nir_ssa_def *by = nir_fsub(b, nir_vector_extract(b, v_b, ky), nir_fmul(b, sy, nir_vector_extract(b, v_b, kz)));
+ nir_ssa_def *cx = nir_fsub(b, nir_vector_extract(b, v_c, kx), nir_fmul(b, sx, nir_vector_extract(b, v_c, kz)));
+ nir_ssa_def *cy = nir_fsub(b, nir_vector_extract(b, v_c, ky), nir_fmul(b, sy, nir_vector_extract(b, v_c, kz)));
nir_ssa_def *u = nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx));
nir_ssa_def *v = nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx));
nir_ssa_def *w = nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax));
- nir_variable *u_var =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "u");
- nir_variable *v_var =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "v");
- nir_variable *w_var =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "w");
+ nir_variable *u_var = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "u");
+ nir_variable *v_var = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "v");
+ nir_variable *w_var = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "w");
nir_store_var(b, u_var, u, 0x1);
nir_store_var(b, v_var, v, 0x1);
nir_store_var(b, w_var, w, 0x1);
* The Vulkan spec states it only needs single precision watertightness
* but we fail dEQP-VK.ray_tracing_pipeline.watertightness.closedFan2.1024 with
* failures = 1 without doing this. :( */
- nir_ssa_def *cond_retest = nir_ior(
- b, nir_ior(b, nir_feq_imm(b, u, 0.0f), nir_feq_imm(b, v, 0.0f)),
- nir_feq_imm(b, w, 0.0f));
+ nir_ssa_def *cond_retest =
+ nir_ior(b, nir_ior(b, nir_feq_imm(b, u, 0.0f), nir_feq_imm(b, v, 0.0f)), nir_feq_imm(b, w, 0.0f));
nir_push_if(b, cond_retest);
{
cx = nir_f2f64(b, cx);
cy = nir_f2f64(b, cy);
- nir_store_var(b, u_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx))),
- 0x1);
- nir_store_var(b, v_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx))),
- 0x1);
- nir_store_var(b, w_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax))),
- 0x1);
+ nir_store_var(b, u_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx))), 0x1);
+ nir_store_var(b, v_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx))), 0x1);
+ nir_store_var(b, w_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax))), 0x1);
}
nir_pop_if(b, NULL);
w = nir_load_var(b, w_var);
/* Perform edge tests. */
- nir_ssa_def *cond_back = nir_ior(
- b, nir_ior(b, nir_flt_imm(b, u, 0.0f), nir_flt_imm(b, v, 0.0f)),
- nir_flt_imm(b, w, 0.0f));
+ nir_ssa_def *cond_back =
+ nir_ior(b, nir_ior(b, nir_flt_imm(b, u, 0.0f), nir_flt_imm(b, v, 0.0f)), nir_flt_imm(b, w, 0.0f));
- nir_ssa_def *cond_front = nir_ior(
- b, nir_ior(b, nir_fgt_imm(b, u, 0.0f), nir_fgt_imm(b, v, 0.0f)),
- nir_flt(b, nir_imm_float(b, 0.0f), w));
+ nir_ssa_def *cond_front =
+ nir_ior(b, nir_ior(b, nir_fgt_imm(b, u, 0.0f), nir_fgt_imm(b, v, 0.0f)), nir_flt(b, nir_imm_float(b, 0.0f), w));
nir_ssa_def *cond = nir_inot(b, nir_iand(b, cond_back, cond_front));
nir_ssa_def *bz = nir_fmul(b, sz, nir_vector_extract(b, v_b, kz));
nir_ssa_def *cz = nir_fmul(b, sz, nir_vector_extract(b, v_c, kz));
- nir_ssa_def *t =
- nir_fadd(b, nir_fadd(b, nir_fmul(b, u, az), nir_fmul(b, v, bz)), nir_fmul(b, w, cz));
+ nir_ssa_def *t = nir_fadd(b, nir_fadd(b, nir_fmul(b, u, az), nir_fmul(b, v, bz)), nir_fmul(b, w, cz));
nir_ssa_def *t_signed = nir_fmul(b, nir_fsign(b, det), t);
}
static nir_ssa_def *
-build_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node,
- bool skip_type_and)
+build_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node, bool skip_type_and)
{
nir_ssa_def *addr = skip_type_and ? node : nir_iand_imm(b, node, ~7ull);
addr = nir_ishl_imm(b, addr, 3);
/* Assumes everything is in the top half of address space, which is true in
* GFX9+ for now. */
- return device->physical_device->rad_info.gfx_level >= GFX9
- ? nir_ior_imm(b, addr, 0xffffull << 48)
- : addr;
+ return device->physical_device->rad_info.gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr;
}
nir_ssa_def *
};
for (unsigned i = 0; i < 3; ++i) {
for (unsigned j = 0; j < 3; ++j) {
- nir_ssa_def *v =
- nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j));
+ nir_ssa_def *v = nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j));
result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v;
}
}
{
unsigned offset = offsetof(struct radv_bvh_instance_node, wto_matrix);
for (unsigned i = 0; i < 3; ++i) {
- out[i] = nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_addr, offset + i * 16),
- .align_mul = 64, .align_offset = offset + i * 16);
+ out[i] = nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_addr, offset + i * 16), .align_mul = 64,
+ .align_offset = offset + i * 16);
}
}
/* When a hit is opaque the any_hit shader is skipped for this hit and the hit
* is assumed to be an actual hit. */
static nir_ssa_def *
-hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags,
- const struct radv_ray_flags *ray_flags, nir_ssa_def *geometry_id_and_flags)
+hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, const struct radv_ray_flags *ray_flags,
+ nir_ssa_def *geometry_id_and_flags)
{
- nir_ssa_def *opaque =
- nir_uge_imm(b, nir_ior(b, geometry_id_and_flags, sbt_offset_and_flags),
- RADV_INSTANCE_FORCE_OPAQUE | RADV_INSTANCE_NO_FORCE_NOT_OPAQUE);
+ nir_ssa_def *opaque = nir_uge_imm(b, nir_ior(b, geometry_id_and_flags, sbt_offset_and_flags),
+ RADV_INSTANCE_FORCE_OPAQUE | RADV_INSTANCE_NO_FORCE_NOT_OPAQUE);
opaque = nir_bcsel(b, ray_flags->force_opaque, nir_imm_true(b), opaque);
opaque = nir_bcsel(b, ray_flags->force_not_opaque, nir_imm_false(b), opaque);
return opaque;
* use the same descriptor, which avoids divergence when different rays hit different
* instances at the cost of having to use 64-bit node ids. */
const uint64_t bvh_size = 1ull << 42;
- return nir_imm_ivec4(
- b, 0, 1u << 31 /* Enable box sorting */, (bvh_size - 1) & 0xFFFFFFFFu,
- ((bvh_size - 1) >> 32) | (1u << 24 /* Return IJ for triangles */) | (1u << 31));
+ return nir_imm_ivec4(b, 0, 1u << 31 /* Enable box sorting */, (bvh_size - 1) & 0xFFFFFFFFu,
+ ((bvh_size - 1) >> 32) | (1u << 24 /* Return IJ for triangles */) | (1u << 31));
}
static void
-insert_traversal_triangle_case(struct radv_device *device, nir_builder *b,
- const struct radv_ray_traversal_args *args,
- const struct radv_ray_flags *ray_flags, nir_ssa_def *result,
- nir_ssa_def *bvh_node)
+insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args,
+ const struct radv_ray_flags *ray_flags, nir_ssa_def *result, nir_ssa_def *bvh_node)
{
if (!args->triangle_cb)
return;
nir_push_if(b, nir_flt(b, intersection.t, nir_load_deref(b, args->vars.tmax)));
{
intersection.frontface = nir_fgt_imm(b, div, 0);
- nir_ssa_def *switch_ccw = nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
- RADV_INSTANCE_TRIANGLE_FLIP_FACING);
+ nir_ssa_def *switch_ccw =
+ nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), RADV_INSTANCE_TRIANGLE_FLIP_FACING);
intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw);
nir_ssa_def *not_cull = ray_flags->no_skip_triangles;
nir_ssa_def *not_facing_cull =
nir_bcsel(b, intersection.frontface, ray_flags->no_cull_front, ray_flags->no_cull_back);
- not_cull =
- nir_iand(b, not_cull,
- nir_ior(b, not_facing_cull,
- nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
- RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE)));
+ not_cull = nir_iand(b, not_cull,
+ nir_ior(b, not_facing_cull,
+ nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
+ RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE)));
nir_push_if(b, nir_iand(b,
intersection.base.node_addr = build_node_to_addr(device, b, bvh_node, false);
nir_ssa_def *triangle_info = nir_build_load_global(
b, 2, 32,
- nir_iadd_imm(b, intersection.base.node_addr,
- offsetof(struct radv_bvh_triangle_node, triangle_id)));
+ nir_iadd_imm(b, intersection.base.node_addr, offsetof(struct radv_bvh_triangle_node, triangle_id)));
intersection.base.primitive_id = nir_channel(b, triangle_info, 0);
intersection.base.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
- intersection.base.opaque =
- hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), ray_flags,
- intersection.base.geometry_id_and_flags);
+ intersection.base.opaque = hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), ray_flags,
+ intersection.base.geometry_id_and_flags);
- not_cull = nir_bcsel(b, intersection.base.opaque, ray_flags->no_cull_opaque,
- ray_flags->no_cull_no_opaque);
+ not_cull = nir_bcsel(b, intersection.base.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque);
nir_push_if(b, not_cull);
{
nir_ssa_def *divs[2] = {div, div};
- intersection.barycentrics =
- nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2));
+ intersection.barycentrics = nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2));
args->triangle_cb(b, &intersection, args, ray_flags);
}
}
static void
-insert_traversal_aabb_case(struct radv_device *device, nir_builder *b,
- const struct radv_ray_traversal_args *args,
+insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args,
const struct radv_ray_flags *ray_flags, nir_ssa_def *bvh_node)
{
if (!args->aabb_cb)
struct radv_leaf_intersection intersection;
intersection.node_addr = build_node_to_addr(device, b, bvh_node, false);
nir_ssa_def *triangle_info = nir_build_load_global(
- b, 2, 32,
- nir_iadd_imm(b, intersection.node_addr, offsetof(struct radv_bvh_aabb_node, primitive_id)));
+ b, 2, 32, nir_iadd_imm(b, intersection.node_addr, offsetof(struct radv_bvh_aabb_node, primitive_id)));
intersection.primitive_id = nir_channel(b, triangle_info, 0);
intersection.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
- intersection.opaque = hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
- ray_flags, intersection.geometry_id_and_flags);
+ intersection.opaque = hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), ray_flags,
+ intersection.geometry_id_and_flags);
- nir_ssa_def *not_cull =
- nir_bcsel(b, intersection.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque);
+ nir_ssa_def *not_cull = nir_bcsel(b, intersection.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque);
not_cull = nir_iand(b, not_cull, ray_flags->no_skip_aabbs);
nir_push_if(b, not_cull);
{
}
nir_ssa_def *
-radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
- const struct radv_ray_traversal_args *args)
+radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args)
{
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
struct radv_ray_flags ray_flags = {
.force_opaque = nir_test_mask(b, args->flags, SpvRayFlagsOpaqueKHRMask),
.force_not_opaque = nir_test_mask(b, args->flags, SpvRayFlagsNoOpaqueKHRMask),
- .terminate_on_first_hit =
- nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask),
- .no_cull_front = nir_ieq_imm(
- b, nir_iand_imm(b, args->flags, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 0),
- .no_cull_back =
- nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullBackFacingTrianglesKHRMask), 0),
- .no_cull_opaque =
- nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullOpaqueKHRMask), 0),
- .no_cull_no_opaque =
- nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullNoOpaqueKHRMask), 0),
- .no_skip_triangles =
- nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask), 0),
+ .terminate_on_first_hit = nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask),
+ .no_cull_front = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 0),
+ .no_cull_back = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullBackFacingTrianglesKHRMask), 0),
+ .no_cull_opaque = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullOpaqueKHRMask), 0),
+ .no_cull_no_opaque = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullNoOpaqueKHRMask), 0),
+ .no_skip_triangles = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask), 0),
.no_skip_aabbs = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0),
};
nir_push_loop(b);
{
- nir_push_if(
- b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), RADV_BVH_INVALID_NODE));
+ nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), RADV_BVH_INVALID_NODE));
{
/* Early exit if we never overflowed the stack, to avoid having to backtrack to
* the root for no reason. */
- nir_push_if(b, nir_ilt_imm(b, nir_load_deref(b, args->vars.stack),
- args->stack_base + args->stack_stride));
+ nir_push_if(b, nir_ilt_imm(b, nir_load_deref(b, args->vars.stack), args->stack_base + args->stack_stride));
{
nir_store_var(b, incomplete, nir_imm_false(b), 0x1);
nir_jump(b, nir_jump_break);
}
nir_pop_if(b, NULL);
- nir_ssa_def *stack_instance_exit = nir_ige(b, nir_load_deref(b, args->vars.top_stack),
- nir_load_deref(b, args->vars.stack));
+ nir_ssa_def *stack_instance_exit =
+ nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack));
nir_ssa_def *root_instance_exit =
- nir_ieq(b, nir_load_deref(b, args->vars.previous_node),
- nir_load_deref(b, args->vars.instance_bottom_node));
- nir_if *instance_exit =
- nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit));
+ nir_ieq(b, nir_load_deref(b, args->vars.previous_node), nir_load_deref(b, args->vars.instance_bottom_node));
+ nir_if *instance_exit = nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit));
instance_exit->control = nir_selection_control_dont_flatten;
{
nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, -1), 1);
- nir_store_deref(b, args->vars.previous_node,
- nir_load_deref(b, args->vars.instance_top_node), 1);
- nir_store_deref(b, args->vars.instance_bottom_node,
- nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 1);
+ nir_store_deref(b, args->vars.previous_node, nir_load_deref(b, args->vars.instance_top_node), 1);
+ nir_store_deref(b, args->vars.instance_bottom_node, nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 1);
nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1);
nir_store_deref(b, args->vars.origin, args->origin, 7);
}
nir_pop_if(b, NULL);
- nir_push_if(b, nir_ige(b, nir_load_deref(b, args->vars.stack_low_watermark),
- nir_load_deref(b, args->vars.stack)));
+ nir_push_if(
+ b, nir_ige(b, nir_load_deref(b, args->vars.stack_low_watermark), nir_load_deref(b, args->vars.stack)));
{
nir_ssa_def *prev = nir_load_deref(b, args->vars.previous_node);
- nir_ssa_def *bvh_addr =
- build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
+ nir_ssa_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
nir_ssa_def *parent = fetch_parent_node(b, bvh_addr, prev);
nir_push_if(b, nir_ieq_imm(b, parent, RADV_BVH_INVALID_NODE));
}
nir_push_else(b, NULL);
{
- nir_store_deref(
- b, args->vars.stack,
- nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1);
+ nir_store_deref(b, args->vars.stack,
+ nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1);
nir_ssa_def *stack_ptr =
- nir_umod_imm(b, nir_load_deref(b, args->vars.stack),
- args->stack_stride * args->stack_entries);
+ nir_umod_imm(b, nir_load_deref(b, args->vars.stack), args->stack_stride * args->stack_entries);
nir_ssa_def *bvh_node = args->stack_load_cb(b, stack_ptr, args);
nir_store_deref(b, args->vars.current_node, bvh_node, 0x1);
- nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE),
- 0x1);
+ nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
}
nir_pop_if(b, NULL);
}
nir_store_deref(b, args->vars.previous_node, bvh_node, 0x1);
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
- nir_ssa_def *global_bvh_node =
- nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
+ nir_ssa_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
nir_ssa_def *intrinsic_result = NULL;
if (!radv_emulate_rt(device->physical_device)) {
- intrinsic_result = nir_bvh64_intersect_ray_amd(
- b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node), nir_load_deref(b, args->vars.tmax),
- nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
- nir_load_deref(b, args->vars.inv_dir));
+ intrinsic_result =
+ nir_bvh64_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node),
+ nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
+ nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir));
}
nir_ssa_def *node_type = nir_iand_imm(b, bvh_node, 7);
nir_push_else(b, NULL);
{
/* instance */
- nir_ssa_def *instance_node_addr =
- build_node_to_addr(device, b, global_bvh_node, false);
+ nir_ssa_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node, false);
nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1);
- nir_ssa_def *instance_data = nir_build_load_global(
- b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0);
+ nir_ssa_def *instance_data =
+ nir_build_load_global(b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0);
nir_ssa_def *wto_matrix[3];
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
- nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3),
- 1);
+ nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), 1);
nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2);
- nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask),
- nir_imm_int(b, 1 << 24)));
+ nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask), nir_imm_int(b, 1 << 24)));
{
nir_jump(b, nir_jump_continue);
}
nir_pop_if(b, NULL);
nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1);
- nir_store_deref(b, args->vars.bvh_base,
- nir_pack_64_2x32(b, nir_trim_vector(b, instance_data, 2)),
- 1);
+ nir_store_deref(b, args->vars.bvh_base, nir_pack_64_2x32(b, nir_trim_vector(b, instance_data, 2)), 1);
/* Push the instance root node onto the stack */
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 0x1);
- nir_store_deref(b, args->vars.instance_bottom_node,
- nir_imm_int(b, RADV_BVH_ROOT_NODE), 1);
+ nir_store_deref(b, args->vars.instance_bottom_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 1);
nir_store_deref(b, args->vars.instance_top_node, bvh_node, 1);
/* Transform the ray into object space */
- nir_store_deref(b, args->vars.origin,
- nir_build_vec3_mat_mult(b, args->origin, wto_matrix, true), 7);
- nir_store_deref(b, args->vars.dir,
- nir_build_vec3_mat_mult(b, args->dir, wto_matrix, false), 7);
- nir_store_deref(b, args->vars.inv_dir,
- nir_fdiv(b, vec3ones, nir_load_deref(b, args->vars.dir)), 7);
+ nir_store_deref(b, args->vars.origin, nir_build_vec3_mat_mult(b, args->origin, wto_matrix, true), 7);
+ nir_store_deref(b, args->vars.dir, nir_build_vec3_mat_mult(b, args->dir, wto_matrix, false), 7);
+ nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, nir_load_deref(b, args->vars.dir)), 7);
}
nir_pop_if(b, NULL);
}
/* If we didn't run the intrinsic cause the hardware didn't support it,
* emulate ray/box intersection here */
result = intersect_ray_amd_software_box(
- device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax),
- nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
- nir_load_deref(b, args->vars.inv_dir));
+ device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
+ nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir));
}
/* box */
for (unsigned i = 4; i-- > 1;) {
nir_ssa_def *stack = nir_load_deref(b, args->vars.stack);
- nir_ssa_def *stack_ptr =
- nir_umod_imm(b, stack, args->stack_entries * args->stack_stride);
+ nir_ssa_def *stack_ptr = nir_umod_imm(b, stack, args->stack_entries * args->stack_stride);
args->stack_store_cb(b, stack_ptr, new_nodes[i], args);
- nir_store_deref(b, args->vars.stack, nir_iadd_imm(b, stack, args->stack_stride),
- 1);
+ nir_store_deref(b, args->vars.stack, nir_iadd_imm(b, stack, args->stack_stride), 1);
if (i == 1) {
nir_ssa_def *new_watermark =
- nir_iadd_imm(b, nir_load_deref(b, args->vars.stack),
- -args->stack_entries * args->stack_stride);
- new_watermark = nir_imax(b, nir_load_deref(b, args->vars.stack_low_watermark),
- new_watermark);
+ nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_entries * args->stack_stride);
+ new_watermark = nir_imax(b, nir_load_deref(b, args->vars.stack_low_watermark), new_watermark);
nir_store_deref(b, args->vars.stack_low_watermark, new_watermark, 0x1);
}
{
nir_ssa_def *next = nir_imm_int(b, RADV_BVH_INVALID_NODE);
for (unsigned i = 0; i < 3; ++i) {
- next = nir_bcsel(b, nir_ieq(b, prev_node, nir_channel(b, result, i)),
- nir_channel(b, result, i + 1), next);
+ next = nir_bcsel(b, nir_ieq(b, prev_node, nir_channel(b, result, i)), nir_channel(b, result, i + 1),
+ next);
}
nir_store_deref(b, args->vars.current_node, next, 0x1);
}
/* If we didn't run the intrinsic cause the hardware didn't support it,
* emulate ray/tri intersection here */
result = intersect_ray_amd_software_tri(
- device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax),
- nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
- nir_load_deref(b, args->vars.inv_dir));
+ device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
+ nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir));
}
insert_traversal_triangle_case(device, b, args, &ray_flags, result, global_bvh_node);
}
#include "radv_private.h"
-void nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices,
- uint32_t chan_1, uint32_t chan_2);
+void nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices, uint32_t chan_1,
+ uint32_t chan_2);
-nir_ssa_def *intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b,
- nir_ssa_def *bvh_node, nir_ssa_def *ray_tmax,
- nir_ssa_def *origin, nir_ssa_def *dir,
+nir_ssa_def *intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node,
+ nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir,
nir_ssa_def *inv_dir);
-nir_ssa_def *intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b,
- nir_ssa_def *bvh_node, nir_ssa_def *ray_tmax,
- nir_ssa_def *origin, nir_ssa_def *dir,
+nir_ssa_def *intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node,
+ nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir,
nir_ssa_def *inv_dir);
nir_ssa_def *build_addr_to_node(nir_builder *b, nir_ssa_def *addr);
-nir_ssa_def *nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[],
- bool translation);
+nir_ssa_def *nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[], bool translation);
void nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out);
nir_ssa_def *opaque;
};
-typedef void (*radv_aabb_intersection_cb)(nir_builder *b,
- struct radv_leaf_intersection *intersection,
+typedef void (*radv_aabb_intersection_cb)(nir_builder *b, struct radv_leaf_intersection *intersection,
const struct radv_ray_traversal_args *args);
struct radv_triangle_intersection {
nir_ssa_def *barycentrics;
};
-typedef void (*radv_triangle_intersection_cb)(nir_builder *b,
- struct radv_triangle_intersection *intersection,
+typedef void (*radv_triangle_intersection_cb)(nir_builder *b, struct radv_triangle_intersection *intersection,
const struct radv_ray_traversal_args *args,
const struct radv_ray_flags *ray_flags);
.flags = flags,
};
vars.idx = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "idx");
- vars.shader_va =
- nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_va");
- vars.traversal_addr =
- nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "traversal_addr");
+ vars.shader_va = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_va");
+ vars.traversal_addr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "traversal_addr");
vars.arg = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "arg");
vars.stack_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "stack_ptr");
- vars.shader_record_ptr =
- nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_record_ptr");
+ vars.shader_record_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_record_ptr");
const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
- vars.accel_struct =
- nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "accel_struct");
- vars.cull_mask_and_flags =
- nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "cull_mask_and_flags");
- vars.sbt_offset =
- nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_offset");
- vars.sbt_stride =
- nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_stride");
- vars.miss_index =
- nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "miss_index");
+ vars.accel_struct = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "accel_struct");
+ vars.cull_mask_and_flags = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "cull_mask_and_flags");
+ vars.sbt_offset = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_offset");
+ vars.sbt_stride = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_stride");
+ vars.miss_index = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "miss_index");
vars.origin = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_origin");
vars.tmin = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmin");
vars.direction = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_direction");
vars.tmax = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmax");
- vars.primitive_id =
- nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "primitive_id");
+ vars.primitive_id = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "primitive_id");
vars.geometry_id_and_flags =
nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "geometry_id_and_flags");
- vars.instance_addr =
- nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr");
+ vars.instance_addr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr");
vars.hit_kind = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "hit_kind");
vars.opaque = nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "opaque");
- vars.ahit_accept =
- nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "ahit_accept");
- vars.ahit_terminate =
- nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "ahit_terminate");
+ vars.ahit_accept = nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "ahit_accept");
+ vars.ahit_terminate = nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "ahit_terminate");
return vars;
}
* Remap all the variables between the two rt_variables struct for inlining.
*/
static void
-map_rt_variables(struct hash_table *var_remap, struct rt_variables *src,
- const struct rt_variables *dst)
+map_rt_variables(struct hash_table *var_remap, struct rt_variables *src, const struct rt_variables *dst)
{
_mesa_hash_table_insert(var_remap, src->idx, dst->idx);
_mesa_hash_table_insert(var_remap, src->shader_va, dst->shader_va);
create_inner_vars(nir_builder *b, const struct rt_variables *vars)
{
struct rt_variables inner_vars = *vars;
- inner_vars.idx =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_idx");
- inner_vars.shader_record_ptr = nir_variable_create(
- b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_shader_record_ptr");
+ inner_vars.idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_idx");
+ inner_vars.shader_record_ptr =
+ nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_shader_record_ptr");
inner_vars.primitive_id =
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_primitive_id");
- inner_vars.geometry_id_and_flags = nir_variable_create(
- b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_geometry_id_and_flags");
- inner_vars.tmax =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "inner_tmax");
- inner_vars.instance_addr = nir_variable_create(b->shader, nir_var_shader_temp,
- glsl_uint64_t_type(), "inner_instance_addr");
- inner_vars.hit_kind =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_hit_kind");
+ inner_vars.geometry_id_and_flags =
+ nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_geometry_id_and_flags");
+ inner_vars.tmax = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "inner_tmax");
+ inner_vars.instance_addr =
+ nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_instance_addr");
+ inner_vars.hit_kind = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_hit_kind");
return inner_vars;
}
insert_rt_return(nir_builder *b, const struct rt_variables *vars)
{
nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, nir_load_var(b, vars->stack_ptr), -16), 1);
- nir_store_var(b, vars->shader_va,
- nir_load_scratch(b, 1, 64, nir_load_var(b, vars->stack_ptr), .align_mul = 16), 1);
+ nir_store_var(b, vars->shader_va, nir_load_scratch(b, 1, 64, nir_load_var(b, vars->stack_ptr), .align_mul = 16), 1);
}
enum sbt_type {
{
nir_ssa_def *desc_base_addr = nir_load_sbt_base_amd(b);
- nir_ssa_def *desc =
- nir_pack_64_2x32(b, nir_build_load_smem_amd(b, 2, desc_base_addr, nir_imm_int(b, binding)));
+ nir_ssa_def *desc = nir_pack_64_2x32(b, nir_build_load_smem_amd(b, 2, desc_base_addr, nir_imm_int(b, binding)));
nir_ssa_def *stride_offset = nir_imm_int(b, binding + (binding == SBT_RAYGEN ? 8 : 16));
- nir_ssa_def *stride =
- nir_pack_64_2x32(b, nir_build_load_smem_amd(b, 2, desc_base_addr, stride_offset));
+ nir_ssa_def *stride = nir_pack_64_2x32(b, nir_build_load_smem_amd(b, 2, desc_base_addr, stride_offset));
return nir_iadd(b, desc, nir_imul(b, nir_u2u64(b, idx), stride));
}
static void
-load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_ssa_def *idx,
- enum sbt_type binding, enum sbt_entry offset)
+load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_ssa_def *idx, enum sbt_type binding,
+ enum sbt_entry offset)
{
nir_ssa_def *addr = get_sbt_ptr(b, idx, binding);
nir_ssa_def *load_addr = nir_iadd_imm(b, addr, offset);
switch (intr->intrinsic) {
case nir_intrinsic_rt_execute_callable: {
uint32_t size = align(nir_intrinsic_stack_size(intr), 16);
- nir_ssa_def *ret_ptr =
- nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr));
+ nir_ssa_def *ret_ptr = nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr));
ret_ptr = nir_ior_imm(&b_shader, ret_ptr, radv_get_rt_priority(shader->info.stage));
- nir_store_var(
- &b_shader, vars->stack_ptr,
- nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), size), 1);
- nir_store_scratch(&b_shader, ret_ptr, nir_load_var(&b_shader, vars->stack_ptr),
- .align_mul = 16);
+ nir_store_var(&b_shader, vars->stack_ptr,
+ nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), size), 1);
+ nir_store_scratch(&b_shader, ret_ptr, nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16);
- nir_store_var(
- &b_shader, vars->stack_ptr,
- nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 16), 1);
+ nir_store_var(&b_shader, vars->stack_ptr,
+ nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 16), 1);
load_sbt_entry(&b_shader, vars, intr->src[0].ssa, SBT_CALLABLE, SBT_RECURSIVE_PTR);
- nir_store_var(&b_shader, vars->arg,
- nir_iadd_imm(&b_shader, intr->src[1].ssa, -size - 16), 1);
+ nir_store_var(&b_shader, vars->arg, nir_iadd_imm(&b_shader, intr->src[1].ssa, -size - 16), 1);
vars->stack_size = MAX2(vars->stack_size, size + 16);
break;
}
case nir_intrinsic_rt_trace_ray: {
uint32_t size = align(nir_intrinsic_stack_size(intr), 16);
- nir_ssa_def *ret_ptr =
- nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr));
+ nir_ssa_def *ret_ptr = nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr));
ret_ptr = nir_ior_imm(&b_shader, ret_ptr, radv_get_rt_priority(shader->info.stage));
- nir_store_var(
- &b_shader, vars->stack_ptr,
- nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), size), 1);
- nir_store_scratch(&b_shader, ret_ptr, nir_load_var(&b_shader, vars->stack_ptr),
- .align_mul = 16);
+ nir_store_var(&b_shader, vars->stack_ptr,
+ nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), size), 1);
+ nir_store_scratch(&b_shader, ret_ptr, nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16);
- nir_store_var(
- &b_shader, vars->stack_ptr,
- nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 16), 1);
+ nir_store_var(&b_shader, vars->stack_ptr,
+ nir_iadd_imm_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 16), 1);
- nir_store_var(&b_shader, vars->shader_va,
- nir_load_var(&b_shader, vars->traversal_addr), 1);
- nir_store_var(&b_shader, vars->arg,
- nir_iadd_imm(&b_shader, intr->src[10].ssa, -size - 16), 1);
+ nir_store_var(&b_shader, vars->shader_va, nir_load_var(&b_shader, vars->traversal_addr), 1);
+ nir_store_var(&b_shader, vars->arg, nir_iadd_imm(&b_shader, intr->src[10].ssa, -size - 16), 1);
vars->stack_size = MAX2(vars->stack_size, size + 16);
/* Per the SPIR-V extension spec we have to ignore some bits for some arguments. */
nir_store_var(&b_shader, vars->accel_struct, intr->src[0].ssa, 0x1);
nir_store_var(&b_shader, vars->cull_mask_and_flags,
- nir_ior(&b_shader, nir_ishl_imm(&b_shader, intr->src[2].ssa, 24),
- intr->src[1].ssa),
- 0x1);
- nir_store_var(&b_shader, vars->sbt_offset,
- nir_iand_imm(&b_shader, intr->src[3].ssa, 0xf), 0x1);
- nir_store_var(&b_shader, vars->sbt_stride,
- nir_iand_imm(&b_shader, intr->src[4].ssa, 0xf), 0x1);
- nir_store_var(&b_shader, vars->miss_index,
- nir_iand_imm(&b_shader, intr->src[5].ssa, 0xffff), 0x1);
+ nir_ior(&b_shader, nir_ishl_imm(&b_shader, intr->src[2].ssa, 24), intr->src[1].ssa), 0x1);
+ nir_store_var(&b_shader, vars->sbt_offset, nir_iand_imm(&b_shader, intr->src[3].ssa, 0xf), 0x1);
+ nir_store_var(&b_shader, vars->sbt_stride, nir_iand_imm(&b_shader, intr->src[4].ssa, 0xf), 0x1);
+ nir_store_var(&b_shader, vars->miss_index, nir_iand_imm(&b_shader, intr->src[5].ssa, 0xffff), 0x1);
nir_store_var(&b_shader, vars->origin, intr->src[6].ssa, 0x7);
nir_store_var(&b_shader, vars->tmin, intr->src[7].ssa, 0x1);
nir_store_var(&b_shader, vars->direction, intr->src[8].ssa, 0x7);
case nir_intrinsic_rt_resume: {
uint32_t size = align(nir_intrinsic_stack_size(intr), 16);
- nir_store_var(
- &b_shader, vars->stack_ptr,
- nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), -size), 1);
+ nir_store_var(&b_shader, vars->stack_ptr,
+ nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), -size), 1);
break;
}
case nir_intrinsic_rt_return_amd: {
case nir_intrinsic_load_scratch: {
nir_instr_rewrite_src_ssa(
instr, &intr->src[0],
- nir_iadd_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr),
- intr->src[0].ssa));
+ nir_iadd_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[0].ssa));
continue;
}
case nir_intrinsic_store_scratch: {
nir_instr_rewrite_src_ssa(
instr, &intr->src[1],
- nir_iadd_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr),
- intr->src[1].ssa));
+ nir_iadd_nuw(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[1].ssa));
continue;
}
case nir_intrinsic_load_rt_arg_scratch_offset_amd: {
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
ret = nir_build_load_global(
&b_shader, 1, 32,
- nir_iadd_imm(&b_shader, instance_node_addr,
- offsetof(struct radv_bvh_instance_node, instance_id)));
+ nir_iadd_imm(&b_shader, instance_node_addr, offsetof(struct radv_bvh_instance_node, instance_id)));
break;
}
case nir_intrinsic_load_ray_flags: {
- ret = nir_iand_imm(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags),
- 0xFFFFFF);
+ ret = nir_iand_imm(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags), 0xFFFFFF);
break;
}
case nir_intrinsic_load_ray_hit_kind: {
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_ssa_def *rows[3];
for (unsigned r = 0; r < 3; ++r)
- rows[r] = nir_build_load_global(
- &b_shader, 4, 32,
- nir_iadd_imm(&b_shader, instance_node_addr,
- offsetof(struct radv_bvh_instance_node, otw_matrix) + r * 16));
- ret =
- nir_vec3(&b_shader, nir_channel(&b_shader, rows[0], c),
- nir_channel(&b_shader, rows[1], c), nir_channel(&b_shader, rows[2], c));
+ rows[r] =
+ nir_build_load_global(&b_shader, 4, 32,
+ nir_iadd_imm(&b_shader, instance_node_addr,
+ offsetof(struct radv_bvh_instance_node, otw_matrix) + r * 16));
+ ret = nir_vec3(&b_shader, nir_channel(&b_shader, rows[0], c), nir_channel(&b_shader, rows[1], c),
+ nir_channel(&b_shader, rows[2], c));
break;
}
case nir_intrinsic_load_ray_object_origin: {
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_ssa_def *wto_matrix[3];
nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
- ret = nir_build_vec3_mat_mult(&b_shader, nir_load_var(&b_shader, vars->origin),
- wto_matrix, true);
+ ret = nir_build_vec3_mat_mult(&b_shader, nir_load_var(&b_shader, vars->origin), wto_matrix, true);
break;
}
case nir_intrinsic_load_ray_object_direction: {
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_ssa_def *wto_matrix[3];
nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
- ret = nir_build_vec3_mat_mult(&b_shader, nir_load_var(&b_shader, vars->direction),
- wto_matrix, false);
+ ret = nir_build_vec3_mat_mult(&b_shader, nir_load_var(&b_shader, vars->direction), wto_matrix, false);
break;
}
case nir_intrinsic_load_intersection_opaque_amd: {
break;
}
case nir_intrinsic_load_cull_mask: {
- ret =
- nir_ushr_imm(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags), 24);
+ ret = nir_ushr_imm(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags), 24);
break;
}
case nir_intrinsic_ignore_ray_intersection: {
case nir_intrinsic_report_ray_intersection: {
nir_push_if(
&b_shader,
- nir_iand(
- &b_shader,
- nir_fge(&b_shader, nir_load_var(&b_shader, vars->tmax), intr->src[0].ssa),
- nir_fge(&b_shader, intr->src[0].ssa, nir_load_var(&b_shader, vars->tmin))));
+ nir_iand(&b_shader, nir_fge(&b_shader, nir_load_var(&b_shader, vars->tmax), intr->src[0].ssa),
+ nir_fge(&b_shader, intr->src[0].ssa, nir_load_var(&b_shader, vars->tmin))));
{
nir_store_var(&b_shader, vars->ahit_accept, nir_imm_true(&b_shader), 0x1);
nir_store_var(&b_shader, vars->tmax, intr->src[0].ssa, 1);
nir_store_var(&b_shader, vars->hit_kind, intr->src[5].ssa, 0x1);
load_sbt_entry(&b_shader, vars, intr->src[0].ssa, SBT_HIT, SBT_RECURSIVE_PTR);
- nir_ssa_def *should_return =
- nir_test_mask(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags),
- SpvRayFlagsSkipClosestHitShaderKHRMask);
+ nir_ssa_def *should_return = nir_test_mask(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags),
+ SpvRayFlagsSkipClosestHitShaderKHRMask);
- if (!(vars->flags &
- VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR)) {
- should_return =
- nir_ior(&b_shader, should_return,
- nir_ieq_imm(&b_shader, nir_load_var(&b_shader, vars->shader_va), 0));
+ if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR)) {
+ should_return = nir_ior(&b_shader, should_return,
+ nir_ieq_imm(&b_shader, nir_load_var(&b_shader, vars->shader_va), 0));
}
/* should_return is set if we had a hit but we won't be calling the closest hit
if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR)) {
/* In case of a NULL miss shader, do nothing and just return. */
- nir_push_if(&b_shader,
- nir_ieq_imm(&b_shader, nir_load_var(&b_shader, vars->shader_va), 0));
+ nir_push_if(&b_shader, nir_ieq_imm(&b_shader, nir_load_var(&b_shader, vars->shader_va), 0));
insert_rt_return(&b_shader, vars);
nir_pop_if(&b_shader, NULL);
}
return false;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
- if (intrin->intrinsic != nir_intrinsic_load_deref &&
- intrin->intrinsic != nir_intrinsic_store_deref)
+ if (intrin->intrinsic != nir_intrinsic_load_deref && intrin->intrinsic != nir_intrinsic_store_deref)
return false;
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
} else if (bit_size == 32) {
components[comp] = nir_load_hit_attrib_amd(b, .base = base);
} else if (bit_size == 16) {
- components[comp] = nir_channel(
- b, nir_unpack_32_2x16(b, nir_load_hit_attrib_amd(b, .base = base)), comp_offset / 2);
+ components[comp] =
+ nir_channel(b, nir_unpack_32_2x16(b, nir_load_hit_attrib_amd(b, .base = base)), comp_offset / 2);
} else if (bit_size == 8) {
- components[comp] = nir_channel(
- b, nir_unpack_bits(b, nir_load_hit_attrib_amd(b, .base = base), 8), comp_offset);
+ components[comp] =
+ nir_channel(b, nir_unpack_bits(b, nir_load_hit_attrib_amd(b, .base = base), 8), comp_offset);
} else {
unreachable("Invalid bit_size");
}
nir_ssa_def *prev = nir_unpack_32_2x16(b, nir_load_hit_attrib_amd(b, .base = base));
nir_ssa_def *components[2];
for (uint32_t word = 0; word < 2; word++)
- components[word] = (word == comp_offset / 2) ? nir_channel(b, value, comp)
- : nir_channel(b, prev, word);
- nir_store_hit_attrib_amd(b, nir_pack_32_2x16(b, nir_vec(b, components, 2)),
- .base = base);
+ components[word] = (word == comp_offset / 2) ? nir_channel(b, value, comp) : nir_channel(b, prev, word);
+ nir_store_hit_attrib_amd(b, nir_pack_32_2x16(b, nir_vec(b, components, 2)), .base = base);
} else if (bit_size == 8) {
nir_ssa_def *prev = nir_unpack_bits(b, nir_load_hit_attrib_amd(b, .base = base), 8);
nir_ssa_def *components[4];
for (uint32_t byte = 0; byte < 4; byte++)
- components[byte] =
- (byte == comp_offset) ? nir_channel(b, value, comp) : nir_channel(b, prev, byte);
- nir_store_hit_attrib_amd(b, nir_pack_32_4x8(b, nir_vec(b, components, 4)),
- .base = base);
+ components[byte] = (byte == comp_offset) ? nir_channel(b, value, comp) : nir_channel(b, prev, byte);
+ nir_store_hit_attrib_amd(b, nir_pack_32_4x8(b, nir_vec(b, components, 4)), .base = base);
} else {
unreachable("Invalid bit_size");
}
static bool
lower_hit_attrib_derefs(nir_shader *shader)
{
- bool progress = nir_shader_instructions_pass(
- shader, lower_hit_attrib_deref, nir_metadata_block_index | nir_metadata_dominance, NULL);
+ bool progress = nir_shader_instructions_pass(shader, lower_hit_attrib_deref,
+ nir_metadata_block_index | nir_metadata_dominance, NULL);
if (progress) {
nir_remove_dead_derefs(shader);
nir_remove_dead_variables(shader, nir_var_ray_hit_attrib, NULL);
nir_ssa_def *offset;
if (!hit_attribs)
- offset = nir_imul_imm(&b,
- nir_iadd_imm(&b, nir_load_local_invocation_index(&b),
- nir_intrinsic_base(intrin) * workgroup_size),
- sizeof(uint32_t));
+ offset = nir_imul_imm(
+ &b, nir_iadd_imm(&b, nir_load_local_invocation_index(&b), nir_intrinsic_base(intrin) * workgroup_size),
+ sizeof(uint32_t));
if (intrin->intrinsic == nir_intrinsic_load_hit_attrib_amd) {
nir_ssa_def *ret;
}
if (!hit_attribs)
- shader->info.shared_size =
- MAX2(shader->info.shared_size, workgroup_size * RADV_MAX_HIT_ATTRIB_SIZE);
+ shader->info.shared_size = MAX2(shader->info.shared_size, workgroup_size * RADV_MAX_HIT_ATTRIB_SIZE);
}
static void
uint32_t old_constant_data_size = dst->constant_data_size;
uint32_t base_offset = align(dst->constant_data_size, align_mul);
dst->constant_data_size = base_offset + src->constant_data_size;
- dst->constant_data =
- rerzalloc_size(dst, dst->constant_data, old_constant_data_size, dst->constant_data_size);
+ dst->constant_data = rerzalloc_size(dst, dst->constant_data, old_constant_data_size, dst->constant_data_size);
memcpy((char *)dst->constant_data + base_offset, src->constant_data, src->constant_data_size);
if (!base_offset)
}
static void
-insert_rt_case(nir_builder *b, nir_shader *shader, struct rt_variables *vars, nir_ssa_def *idx,
- uint32_t call_idx_base, uint32_t call_idx, unsigned stage_idx,
- struct radv_ray_tracing_stage *stages)
+insert_rt_case(nir_builder *b, nir_shader *shader, struct rt_variables *vars, nir_ssa_def *idx, uint32_t call_idx_base,
+ uint32_t call_idx, unsigned stage_idx, struct radv_ray_tracing_stage *stages)
{
- uint32_t workgroup_size = b->shader->info.workgroup_size[0] * b->shader->info.workgroup_size[1] *
- b->shader->info.workgroup_size[2];
+ uint32_t workgroup_size =
+ b->shader->info.workgroup_size[0] * b->shader->info.workgroup_size[1] * b->shader->info.workgroup_size[2];
struct hash_table *var_remap = _mesa_pointer_hash_table_create(NULL);
NIR_PASS(_, shader, lower_rt_derefs);
NIR_PASS(_, shader, lower_hit_attrib_derefs);
- NIR_PASS(_, shader, nir_lower_explicit_io, nir_var_function_temp,
- nir_address_format_32bit_offset);
+ NIR_PASS(_, shader, nir_lower_explicit_io, nir_var_function_temp, nir_address_format_32bit_offset);
return shader;
}
nir_ssa_def *hit_kind = nir_load_param(b, 2);
nir_ssa_def *scratch_offset = nir_load_param(b, 3);
- nir_deref_instr *commit =
- nir_build_deref_cast(b, commit_ptr, nir_var_function_temp, glsl_bool_type(), 0);
+ nir_deref_instr *commit = nir_build_deref_cast(b, commit_ptr, nir_var_function_temp, glsl_bool_type(), 0);
nir_foreach_block_safe (block, impl) {
nir_foreach_instr_safe (instr, block) {
*/
case nir_intrinsic_load_scratch:
b->cursor = nir_before_instr(instr);
- nir_instr_rewrite_src_ssa(instr, &intrin->src[0],
- nir_iadd_nuw(b, scratch_offset, intrin->src[0].ssa));
+ nir_instr_rewrite_src_ssa(instr, &intrin->src[0], nir_iadd_nuw(b, scratch_offset, intrin->src[0].ssa));
break;
case nir_intrinsic_store_scratch:
b->cursor = nir_before_instr(instr);
- nir_instr_rewrite_src_ssa(instr, &intrin->src[1],
- nir_iadd_nuw(b, scratch_offset, intrin->src[1].ssa));
+ nir_instr_rewrite_src_ssa(instr, &intrin->src[1], nir_iadd_nuw(b, scratch_offset, intrin->src[1].ssa));
break;
case nir_intrinsic_load_rt_arg_scratch_offset_amd:
b->cursor = nir_after_instr(instr);
ret.origin = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_origin");
ret.dir = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_dir");
- ret.inv_dir =
- nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_inv_dir");
- ret.sbt_offset_and_flags = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(),
- "traversal_sbt_offset_and_flags");
- ret.instance_addr =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr");
+ ret.inv_dir = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_inv_dir");
+ ret.sbt_offset_and_flags =
+ nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_sbt_offset_and_flags");
+ ret.instance_addr = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr");
ret.hit = nir_variable_create(b->shader, nir_var_shader_temp, glsl_bool_type(), "traversal_hit");
- ret.bvh_base = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(),
- "traversal_bvh_base");
- ret.stack =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_ptr");
- ret.top_stack = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(),
- "traversal_top_stack_ptr");
- ret.stack_low_watermark = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(),
- "traversal_stack_low_watermark");
- ret.current_node =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "current_node;");
- ret.previous_node =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "previous_node");
- ret.instance_top_node =
- nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "instance_top_node");
+ ret.bvh_base = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "traversal_bvh_base");
+ ret.stack = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_ptr");
+ ret.top_stack = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_top_stack_ptr");
+ ret.stack_low_watermark =
+ nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_low_watermark");
+ ret.current_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "current_node;");
+ ret.previous_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "previous_node");
+ ret.instance_top_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "instance_top_node");
ret.instance_bottom_node =
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "instance_bottom_node");
return ret;
/* Avoid emitting stages with the same shaders/handles multiple times. */
bool is_dup = false;
for (unsigned j = 0; j < i; ++j)
- if (data->pipeline->groups[j].handle.any_hit_index ==
- data->pipeline->groups[i].handle.any_hit_index)
+ if (data->pipeline->groups[j].handle.any_hit_index == data->pipeline->groups[i].handle.any_hit_index)
is_dup = true;
if (is_dup)
continue;
- nir_shader *nir_stage =
- radv_pipeline_cache_handle_to_nir(device, data->pipeline->stages[shader_id].shader);
+ nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, data->pipeline->stages[shader_id].shader);
assert(nir_stage);
- insert_rt_case(b, nir_stage, vars, sbt_idx, 0, data->pipeline->groups[i].handle.any_hit_index,
- shader_id, data->pipeline->stages);
+ insert_rt_case(b, nir_stage, vars, sbt_idx, 0, data->pipeline->groups[i].handle.any_hit_index, shader_id,
+ data->pipeline->stages);
ralloc_free(nir_stage);
}
static void
handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *intersection,
- const struct radv_ray_traversal_args *args,
- const struct radv_ray_flags *ray_flags)
+ const struct radv_ray_traversal_args *args, const struct radv_ray_flags *ray_flags)
{
struct traversal_data *data = args->data;
nir_ssa_def *geometry_id = nir_iand_imm(b, intersection->base.geometry_id_and_flags, 0xfffffff);
- nir_ssa_def *sbt_idx = nir_iadd(
- b,
- nir_iadd(b, nir_load_var(b, data->vars->sbt_offset),
- nir_iand_imm(b, nir_load_var(b, data->trav_vars->sbt_offset_and_flags), 0xffffff)),
- nir_imul(b, nir_load_var(b, data->vars->sbt_stride), geometry_id));
+ nir_ssa_def *sbt_idx =
+ nir_iadd(b,
+ nir_iadd(b, nir_load_var(b, data->vars->sbt_offset),
+ nir_iand_imm(b, nir_load_var(b, data->trav_vars->sbt_offset_and_flags), 0xffffff)),
+ nir_imul(b, nir_load_var(b, data->vars->sbt_stride), geometry_id));
- nir_ssa_def *hit_kind =
- nir_bcsel(b, intersection->frontface, nir_imm_int(b, 0xFE), nir_imm_int(b, 0xFF));
+ nir_ssa_def *hit_kind = nir_bcsel(b, intersection->frontface, nir_imm_int(b, 0xFE), nir_imm_int(b, 0xFF));
nir_ssa_def *prev_barycentrics = nir_load_var(b, data->barycentrics);
nir_store_var(b, data->barycentrics, intersection->barycentrics, 0x3);
struct rt_variables inner_vars = create_inner_vars(b, data->vars);
nir_store_var(b, inner_vars.primitive_id, intersection->base.primitive_id, 1);
- nir_store_var(b, inner_vars.geometry_id_and_flags, intersection->base.geometry_id_and_flags,
- 1);
+ nir_store_var(b, inner_vars.geometry_id_and_flags, intersection->base.geometry_id_and_flags, 1);
nir_store_var(b, inner_vars.tmax, intersection->t, 0x1);
- nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, data->trav_vars->instance_addr),
- 0x1);
+ nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, data->trav_vars->instance_addr), 0x1);
nir_store_var(b, inner_vars.hit_kind, hit_kind, 0x1);
load_sbt_entry(b, &inner_vars, sbt_idx, SBT_HIT, SBT_ANY_HIT_IDX);
nir_store_var(b, data->vars->primitive_id, intersection->base.primitive_id, 1);
nir_store_var(b, data->vars->geometry_id_and_flags, intersection->base.geometry_id_and_flags, 1);
nir_store_var(b, data->vars->tmax, intersection->t, 0x1);
- nir_store_var(b, data->vars->instance_addr, nir_load_var(b, data->trav_vars->instance_addr),
- 0x1);
+ nir_store_var(b, data->vars->instance_addr, nir_load_var(b, data->trav_vars->instance_addr), 0x1);
nir_store_var(b, data->vars->hit_kind, hit_kind, 0x1);
nir_store_var(b, data->vars->idx, sbt_idx, 1);
struct traversal_data *data = args->data;
nir_ssa_def *geometry_id = nir_iand_imm(b, intersection->geometry_id_and_flags, 0xfffffff);
- nir_ssa_def *sbt_idx = nir_iadd(
- b,
- nir_iadd(b, nir_load_var(b, data->vars->sbt_offset),
- nir_iand_imm(b, nir_load_var(b, data->trav_vars->sbt_offset_and_flags), 0xffffff)),
- nir_imul(b, nir_load_var(b, data->vars->sbt_stride), geometry_id));
+ nir_ssa_def *sbt_idx =
+ nir_iadd(b,
+ nir_iadd(b, nir_load_var(b, data->vars->sbt_offset),
+ nir_iand_imm(b, nir_load_var(b, data->trav_vars->sbt_offset_and_flags), 0xffffff)),
+ nir_imul(b, nir_load_var(b, data->vars->sbt_stride), geometry_id));
struct rt_variables inner_vars = create_inner_vars(b, data->vars);
/* Avoid emitting stages with the same shaders/handles multiple times. */
bool is_dup = false;
for (unsigned j = 0; j < i; ++j)
- if (data->pipeline->groups[j].handle.intersection_index ==
- data->pipeline->groups[i].handle.intersection_index)
+ if (data->pipeline->groups[j].handle.intersection_index == data->pipeline->groups[i].handle.intersection_index)
is_dup = true;
if (is_dup)
continue;
- nir_shader *nir_stage =
- radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[shader_id].shader);
+ nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[shader_id].shader);
assert(nir_stage);
nir_shader *any_hit_stage = NULL;
if (any_hit_shader_id != VK_SHADER_UNUSED_KHR) {
- any_hit_stage = radv_pipeline_cache_handle_to_nir(
- data->device, data->pipeline->stages[any_hit_shader_id].shader);
+ any_hit_stage =
+ radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[any_hit_shader_id].shader);
assert(any_hit_stage);
/* reserve stack size for any_hit before it is inlined */
}
insert_rt_case(b, nir_stage, &inner_vars, nir_load_var(b, inner_vars.idx), 0,
- data->pipeline->groups[i].handle.intersection_index, shader_id,
- data->pipeline->stages);
+ data->pipeline->groups[i].handle.intersection_index, shader_id, data->pipeline->stages);
ralloc_free(nir_stage);
}
nir_store_var(b, data->vars->primitive_id, intersection->primitive_id, 1);
nir_store_var(b, data->vars->geometry_id_and_flags, intersection->geometry_id_and_flags, 1);
nir_store_var(b, data->vars->tmax, nir_load_var(b, inner_vars.tmax), 0x1);
- nir_store_var(b, data->vars->instance_addr, nir_load_var(b, data->trav_vars->instance_addr),
- 0x1);
+ nir_store_var(b, data->vars->instance_addr, nir_load_var(b, data->trav_vars->instance_addr), 0x1);
nir_store_var(b, data->vars->idx, sbt_idx, 1);
nir_store_var(b, data->trav_vars->hit, nir_imm_true(b), 1);
- nir_ssa_def *terminate_on_first_hit =
- nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask);
+ nir_ssa_def *terminate_on_first_hit = nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask);
nir_ssa_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate);
nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated));
{
}
static void
-store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value,
- const struct radv_ray_traversal_args *args)
+store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value, const struct radv_ray_traversal_args *args)
{
nir_store_shared(b, value, index, .base = 0, .align_mul = 4);
}
nir_shader *
radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
- const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
- const struct radv_pipeline_key *key)
+ const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const struct radv_pipeline_key *key)
{
/* Create the traversal shader as an intersection shader to prevent validation failures due to
* invalid variable modes.*/
b.shader->info.internal = false;
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4;
- b.shader->info.shared_size =
- device->physical_device->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t);
+ b.shader->info.shared_size = device->physical_device->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t);
struct rt_variables vars = create_rt_variables(b.shader, pCreateInfo->flags);
/* Register storage for hit attributes */
nir_variable *hit_attribs[RADV_MAX_HIT_ATTRIB_SIZE / sizeof(uint32_t)];
for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++)
- hit_attribs[i] = nir_local_variable_create(nir_shader_get_entrypoint(b.shader),
- glsl_uint_type(), "ahit_attrib");
+ hit_attribs[i] = nir_local_variable_create(nir_shader_get_entrypoint(b.shader), glsl_uint_type(), "ahit_attrib");
- nir_variable *barycentrics = nir_variable_create(
- b.shader, nir_var_ray_hit_attrib, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "barycentrics");
+ nir_variable *barycentrics =
+ nir_variable_create(b.shader, nir_var_ray_hit_attrib, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "barycentrics");
barycentrics->data.driver_location = 0;
/* initialize trace_ray arguments */
nir_store_var(&b, trav_vars.hit, nir_imm_false(&b), 1);
nir_ssa_def *bvh_offset = nir_build_load_global(
- &b, 1, 32,
- nir_iadd_imm(&b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
+ &b, 1, 32, nir_iadd_imm(&b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
.access = ACCESS_NON_WRITEABLE);
nir_ssa_def *root_bvh_base = nir_iadd(&b, accel_struct, nir_u2u64(&b, bvh_offset));
root_bvh_base = build_addr_to_node(&b, root_bvh_base);
nir_store_var(&b, trav_vars.sbt_offset_and_flags, nir_imm_int(&b, 0), 1);
nir_store_var(&b, trav_vars.instance_addr, nir_imm_int64(&b, 0), 1);
- nir_store_var(&b, trav_vars.stack,
- nir_imul_imm(&b, nir_load_local_invocation_index(&b), sizeof(uint32_t)), 1);
+ nir_store_var(&b, trav_vars.stack, nir_imul_imm(&b, nir_load_local_invocation_index(&b), sizeof(uint32_t)), 1);
nir_store_var(&b, trav_vars.stack_low_watermark, nir_load_var(&b, trav_vars.stack), 1);
nir_store_var(&b, trav_vars.current_node, nir_imm_int(&b, RADV_BVH_ROOT_NODE), 0x1);
nir_store_var(&b, trav_vars.previous_node, nir_imm_int(&b, RADV_BVH_INVALID_NODE), 0x1);
nir_store_var(&b, trav_vars.instance_top_node, nir_imm_int(&b, RADV_BVH_INVALID_NODE), 0x1);
- nir_store_var(&b, trav_vars.instance_bottom_node, nir_imm_int(&b, RADV_BVH_NO_INSTANCE_ROOT),
- 0x1);
+ nir_store_var(&b, trav_vars.instance_bottom_node, nir_imm_int(&b, RADV_BVH_NO_INSTANCE_ROOT), 0x1);
nir_store_var(&b, trav_vars.top_stack, nir_imm_int(&b, -1), 1);
.stack_base = 0,
.stack_store_cb = store_stack_entry,
.stack_load_cb = load_stack_entry,
- .aabb_cb = (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR)
- ? NULL
- : handle_candidate_aabb,
+ .aabb_cb =
+ (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR) ? NULL : handle_candidate_aabb,
.triangle_cb = (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR)
? NULL
: handle_candidate_triangle,
{
for (int i = 0; i < ARRAY_SIZE(hit_attribs); ++i)
nir_store_hit_attrib_amd(&b, nir_load_var(&b, hit_attribs[i]), .base = i);
- nir_execute_closest_hit_amd(
- &b, nir_load_var(&b, vars.idx), nir_load_var(&b, vars.tmax),
- nir_load_var(&b, vars.primitive_id), nir_load_var(&b, vars.instance_addr),
- nir_load_var(&b, vars.geometry_id_and_flags), nir_load_var(&b, vars.hit_kind));
+ nir_execute_closest_hit_amd(&b, nir_load_var(&b, vars.idx), nir_load_var(&b, vars.tmax),
+ nir_load_var(&b, vars.primitive_id), nir_load_var(&b, vars.instance_addr),
+ nir_load_var(&b, vars.geometry_id_and_flags), nir_load_var(&b, vars.hit_kind));
}
nir_push_else(&b, NULL);
{
gl_shader_stage stage = b->shader->info.stage;
nir_ssa_def *prio = nir_iand_imm(b, shader_va, radv_rt_priority_mask);
nir_ssa_def *ballot = nir_ballot(b, 1, wave_size, nir_imm_bool(b, true));
- nir_ssa_def *ballot_traversal =
- nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_traversal));
- nir_ssa_def *ballot_hit_miss =
- nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_hit_miss));
- nir_ssa_def *ballot_callable =
- nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_callable));
+ nir_ssa_def *ballot_traversal = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_traversal));
+ nir_ssa_def *ballot_hit_miss = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_hit_miss));
+ nir_ssa_def *ballot_callable = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_callable));
if (stage != MESA_SHADER_CALLABLE && stage != MESA_SHADER_INTERSECTION)
ballot = nir_bcsel(b, nir_ine_imm(b, ballot_traversal, 0), ballot_traversal, ballot);
void
radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
- const struct radv_shader_args *args, const struct radv_shader_info *info,
- uint32_t *stack_size, bool resume_shader)
+ const struct radv_shader_args *args, const struct radv_shader_info *info, uint32_t *stack_size,
+ bool resume_shader)
{
nir_builder b;
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
nir_ssa_def *shader_va = ac_nir_load_arg(&b, &args->ac, args->ac.rt.next_shader);
shader_va = nir_pack_64_2x32(&b, shader_va);
nir_store_var(&b, vars.shader_va, shader_va, 1);
- nir_store_var(&b, vars.stack_ptr,
- ac_nir_load_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base), 1);
+ nir_store_var(&b, vars.stack_ptr, ac_nir_load_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base), 1);
nir_ssa_def *record_ptr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.shader_record);
nir_store_var(&b, vars.shader_record_ptr, nir_pack_64_2x32(&b, record_ptr), 1);
nir_store_var(&b, vars.arg, ac_nir_load_arg(&b, &args->ac, args->ac.rt.payload_offset), 1);
nir_ssa_def *accel_struct = ac_nir_load_arg(&b, &args->ac, args->ac.rt.accel_struct);
nir_store_var(&b, vars.accel_struct, nir_pack_64_2x32(&b, accel_struct), 1);
- nir_store_var(&b, vars.cull_mask_and_flags,
- ac_nir_load_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags), 1);
+ nir_store_var(&b, vars.cull_mask_and_flags, ac_nir_load_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags), 1);
nir_store_var(&b, vars.sbt_offset, ac_nir_load_arg(&b, &args->ac, args->ac.rt.sbt_offset), 1);
nir_store_var(&b, vars.sbt_stride, ac_nir_load_arg(&b, &args->ac, args->ac.rt.sbt_stride), 1);
nir_store_var(&b, vars.miss_index, ac_nir_load_arg(&b, &args->ac, args->ac.rt.miss_index), 1);
nir_store_var(&b, vars.origin, ac_nir_load_arg(&b, &args->ac, args->ac.rt.ray_origin), 0x7);
nir_store_var(&b, vars.tmin, ac_nir_load_arg(&b, &args->ac, args->ac.rt.ray_tmin), 1);
- nir_store_var(&b, vars.direction, ac_nir_load_arg(&b, &args->ac, args->ac.rt.ray_direction),
- 0x7);
+ nir_store_var(&b, vars.direction, ac_nir_load_arg(&b, &args->ac, args->ac.rt.ray_direction), 0x7);
nir_store_var(&b, vars.tmax, ac_nir_load_arg(&b, &args->ac, args->ac.rt.ray_tmax), 1);
- nir_store_var(&b, vars.primitive_id, ac_nir_load_arg(&b, &args->ac, args->ac.rt.primitive_id),
- 1);
+ nir_store_var(&b, vars.primitive_id, ac_nir_load_arg(&b, &args->ac, args->ac.rt.primitive_id), 1);
nir_ssa_def *instance_addr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.instance_addr);
nir_store_var(&b, vars.instance_addr, nir_pack_64_2x32(&b, instance_addr), 1);
- nir_store_var(&b, vars.geometry_id_and_flags,
- ac_nir_load_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags), 1);
+ nir_store_var(&b, vars.geometry_id_and_flags, ac_nir_load_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags), 1);
nir_store_var(&b, vars.hit_kind, ac_nir_load_arg(&b, &args->ac, args->ac.rt.hit_kind), 1);
/* guard the shader, so that only the correct invocations execute it */
ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_pc, next);
/* store back all variables to registers */
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base,
- nir_load_var(&b, vars.stack_ptr));
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base, nir_load_var(&b, vars.stack_ptr));
ac_nir_store_arg(&b, &args->ac, args->ac.rt.next_shader, nir_load_var(&b, vars.shader_va));
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_record,
- nir_load_var(&b, vars.shader_record_ptr));
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_record, nir_load_var(&b, vars.shader_record_ptr));
ac_nir_store_arg(&b, &args->ac, args->ac.rt.payload_offset, nir_load_var(&b, vars.arg));
ac_nir_store_arg(&b, &args->ac, args->ac.rt.accel_struct, nir_load_var(&b, vars.accel_struct));
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags,
- nir_load_var(&b, vars.cull_mask_and_flags));
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags, nir_load_var(&b, vars.cull_mask_and_flags));
ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_offset, nir_load_var(&b, vars.sbt_offset));
ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_stride, nir_load_var(&b, vars.sbt_stride));
ac_nir_store_arg(&b, &args->ac, args->ac.rt.miss_index, nir_load_var(&b, vars.miss_index));
ac_nir_store_arg(&b, &args->ac, args->ac.rt.primitive_id, nir_load_var(&b, vars.primitive_id));
ac_nir_store_arg(&b, &args->ac, args->ac.rt.instance_addr, nir_load_var(&b, vars.instance_addr));
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags,
- nir_load_var(&b, vars.geometry_id_and_flags));
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags, nir_load_var(&b, vars.geometry_id_and_flags));
ac_nir_store_arg(&b, &args->ac, args->ac.rt.hit_kind, nir_load_var(&b, vars.hit_kind));
nir_metadata_preserve(impl, nir_metadata_none);
/* cleanup passes */
NIR_PASS_V(shader, nir_lower_global_vars_to_local);
NIR_PASS_V(shader, nir_lower_vars_to_ssa);
- if (shader->info.stage == MESA_SHADER_CLOSEST_HIT ||
- shader->info.stage == MESA_SHADER_INTERSECTION)
+ if (shader->info.stage == MESA_SHADER_CLOSEST_HIT || shader->info.stage == MESA_SHADER_INTERSECTION)
NIR_PASS_V(shader, lower_hit_attribs, NULL, info->wave_size);
}
{
switch (filter) {
case VK_FILTER_NEAREST:
- return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT
- : V_008F38_SQ_TEX_XY_FILTER_POINT);
+ return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT : V_008F38_SQ_TEX_XY_FILTER_POINT);
case VK_FILTER_LINEAR:
- return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR
- : V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
+ return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR : V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
case VK_FILTER_CUBIC_EXT:
default:
fprintf(stderr, "illegal texture filter");
for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) {
if (!device->border_color_data.used[slot]) {
/* Copy to the GPU wrt endian-ness. */
- util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot], &value,
- sizeof(VkClearColorValue));
+ util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot], &value, sizeof(VkClearColorValue));
device->border_color_data.used[slot] = true;
break;
}
static void
-radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler,
- const VkSamplerCreateInfo *pCreateInfo)
+radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, const VkSamplerCreateInfo *pCreateInfo)
{
uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
- bool compat_mode = device->physical_device->rad_info.gfx_level == GFX8 ||
- device->physical_device->rad_info.gfx_level == GFX9;
+ bool compat_mode =
+ device->physical_device->rad_info.gfx_level == GFX8 || device->physical_device->rad_info.gfx_level == GFX9;
unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
- bool trunc_coord =
- (pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST) ||
- device->physical_device->rad_info.conformant_trunc_coord;
+ bool trunc_coord = (pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST) ||
+ device->physical_device->rad_info.conformant_trunc_coord;
bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
- VkBorderColor border_color =
- uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
+ VkBorderColor border_color = uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
uint32_t border_color_ptr;
bool disable_cube_wrap = pCreateInfo->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT;
sampler->border_color_slot = RADV_BORDER_COLOR_COUNT;
- if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ||
- border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
+ if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT || border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
assert(custom_border_color);
- sampler->border_color_slot =
- radv_register_border_color(device, custom_border_color->customBorderColor);
+ sampler->border_color_slot = radv_register_border_color(device, custom_border_color->customBorderColor);
/* Did we fail to find a slot? */
if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) {
}
/* If we don't have a custom color, set the ptr to 0 */
- border_color_ptr =
- sampler->border_color_slot != RADV_BORDER_COLOR_COUNT ? sampler->border_color_slot : 0;
-
- sampler->state[0] =
- (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
- S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
- S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
- S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |
- S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
- S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |
- S_008F30_DISABLE_CUBE_WRAP(disable_cube_wrap) | S_008F30_COMPAT_MODE(compat_mode) |
- S_008F30_FILTER_MODE(filter_mode) | S_008F30_TRUNC_COORD(trunc_coord));
+ border_color_ptr = sampler->border_color_slot != RADV_BORDER_COLOR_COUNT ? sampler->border_color_slot : 0;
+
+ sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
+ S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
+ S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
+ S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |
+ S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
+ S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |
+ S_008F30_DISABLE_CUBE_WRAP(disable_cube_wrap) | S_008F30_COMPAT_MODE(compat_mode) |
+ S_008F30_FILTER_MODE(filter_mode) | S_008F30_TRUNC_COORD(trunc_coord));
sampler->state[1] = (S_008F34_MIN_LOD(radv_float_to_ufixed(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
S_008F34_MAX_LOD(radv_float_to_ufixed(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
sampler->state[3] = S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color));
if (device->physical_device->rad_info.gfx_level >= GFX10) {
- sampler->state[2] |=
- S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -32, 31), 8)) |
- S_008F38_ANISO_OVERRIDE_GFX10(device->instance->disable_aniso_single_level);
+ sampler->state[2] |= S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -32, 31), 8)) |
+ S_008F38_ANISO_OVERRIDE_GFX10(device->instance->disable_aniso_single_level);
} else {
- sampler->state[2] |=
- S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
- S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.gfx_level <= GFX8) |
- S_008F38_FILTER_PREC_FIX(1) |
- S_008F38_ANISO_OVERRIDE_GFX8(device->instance->disable_aniso_single_level &&
- device->physical_device->rad_info.gfx_level >= GFX8);
+ sampler->state[2] |= S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
+ S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.gfx_level <= GFX8) |
+ S_008F38_FILTER_PREC_FIX(1) |
+ S_008F38_ANISO_OVERRIDE_GFX8(device->instance->disable_aniso_single_level &&
+ device->physical_device->rad_info.gfx_level >= GFX8);
}
if (device->physical_device->rad_info.gfx_level >= GFX11) {
}
VKAPI_ATTR VkResult VKAPI_CALL
-radv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator, VkSampler *pSampler)
+radv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator,
+ VkSampler *pSampler)
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_sampler *sampler;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
- sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!sampler)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
radv_init_sampler(device, sampler, pCreateInfo);
- sampler->ycbcr_sampler =
- ycbcr_conversion ? vk_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL;
+ sampler->ycbcr_sampler = ycbcr_conversion ? vk_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL;
*pSampler = radv_sampler_to_handle(sampler);
return VK_SUCCESS;
#include "radv_private.h"
static bool
-radv_sdma_v4_v5_copy_image_to_buffer(struct radv_device *device, struct radeon_cmdbuf *cs,
- struct radv_image *image, struct radv_buffer *buffer,
- const VkBufferImageCopy2 *region)
+radv_sdma_v4_v5_copy_image_to_buffer(struct radv_device *device, struct radeon_cmdbuf *cs, struct radv_image *image,
+ struct radv_buffer *buffer, const VkBufferImageCopy2 *region)
{
assert(image->plane_count == 1);
unsigned bpp = image->planes[0].surface.bpe;
src_address += image->planes[0].surface.u.gfx9.offset[0];
- radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
- CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 4 : 0)));
+ radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 4 : 0)));
radeon_emit(cs, bytes - 1);
radeon_emit(cs, 0);
radeon_emit(cs, src_address);
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs, 14 + (dcc ? 3 : 0));
- radeon_emit(cs,
- CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW,
- (tmz ? 4 : 0)) |
- dcc << 19 | (is_v5 ? 0 : 0 /* tiled->buffer.b.b.last_level */) << 20 |
- 1u << 31);
- radeon_emit(cs,
- (uint32_t)tiled_address | (image->planes[0].surface.tile_swizzle << 8));
+ radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, (tmz ? 4 : 0)) |
+ dcc << 19 | (is_v5 ? 0 : 0 /* tiled->buffer.b.b.last_level */) << 20 | 1u << 31);
+ radeon_emit(cs, (uint32_t)tiled_address | (image->planes[0].surface.tile_swizzle << 8));
radeon_emit(cs, (uint32_t)(tiled_address >> 32));
radeon_emit(cs, 0);
radeon_emit(cs, ((tiled_width - 1) << 16));
radeon_emit(cs, (tiled_height - 1));
- radeon_emit(
- cs,
- util_logbase2(bpp) | image->planes[0].surface.u.gfx9.swizzle_mode << 3 |
- image->planes[0].surface.u.gfx9.resource_type << 9 |
- (is_v5 ? 0 /* tiled->buffer.b.b.last_level */ : image->planes[0].surface.u.gfx9.epitch)
- << 16);
+ radeon_emit(cs, util_logbase2(bpp) | image->planes[0].surface.u.gfx9.swizzle_mode << 3 |
+ image->planes[0].surface.u.gfx9.resource_type << 9 |
+ (is_v5 ? 0 /* tiled->buffer.b.b.last_level */ : image->planes[0].surface.u.gfx9.epitch) << 16);
radeon_emit(cs, (uint32_t)linear_address);
radeon_emit(cs, (uint32_t)(linear_address >> 32));
radeon_emit(cs, 0);
unsigned hw_fmt, hw_type;
desc = vk_format_description(image->vk.format);
- hw_fmt = ac_get_cb_format(device->physical_device->rad_info.gfx_level,
- vk_format_to_pipe_format(format));
+ hw_fmt = ac_get_cb_format(device->physical_device->rad_info.gfx_level, vk_format_to_pipe_format(format));
hw_type = radv_translate_buffer_numformat(desc, vk_format_get_first_non_void_channel(format));
/* Add metadata */
radeon_emit(cs, (uint32_t)md_address);
radeon_emit(cs, (uint32_t)(md_address >> 32));
- radeon_emit(cs,
- hw_fmt | vi_alpha_is_on_msb(device, format) << 8 | hw_type << 9 |
- image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size << 24 |
- V_028C78_MAX_BLOCK_SIZE_256B << 26 | tmz << 29 |
- image->planes[0].surface.u.gfx9.color.dcc.pipe_aligned << 31);
+ radeon_emit(cs, hw_fmt | vi_alpha_is_on_msb(device, format) << 8 | hw_type << 9 |
+ image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size << 24 |
+ V_028C78_MAX_BLOCK_SIZE_256B << 26 | tmz << 29 |
+ image->planes[0].surface.u.gfx9.color.dcc.pipe_aligned << 31);
}
assert(cs->cdw <= cdw_max);
}
void
-radv_sdma_copy_buffer(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va,
- uint64_t dst_va, uint64_t size)
+radv_sdma_copy_buffer(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va,
+ uint64_t size)
{
if (size == 0)
return;
enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
- unsigned max_size_per_packet =
- gfx_level >= GFX10_3 ? GFX103_SDMA_COPY_MAX_SIZE : CIK_SDMA_COPY_MAX_SIZE;
+ unsigned max_size_per_packet = gfx_level >= GFX10_3 ? GFX103_SDMA_COPY_MAX_SIZE : CIK_SDMA_COPY_MAX_SIZE;
unsigned align = ~0u;
unsigned ncopy = DIV_ROUND_UP(size, max_size_per_packet);
bool tmz = false;
for (unsigned i = 0; i < ncopy; i++) {
unsigned csize = size >= 4 ? MIN2(size & align, max_size_per_packet) : size;
- radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
- (tmz ? 1u : 0) << 2));
+ radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 1u : 0) << 2));
radeon_emit(cs, gfx_level >= GFX9 ? csize - 1 : csize);
radeon_emit(cs, 0); /* src/dst endian swap */
radeon_emit(cs, src_va);
#include "aco_interface.h"
#include "sid.h"
#include "vk_format.h"
-#include "vk_sync.h"
#include "vk_semaphore.h"
+#include "vk_sync.h"
#include "aco_shader_info.h"
#include "radv_aco_shader_info.h"
.has_fsub = true,
.has_isub = true,
.has_sdot_4x8 = device->rad_info.has_accelerated_dot_product,
- .has_sudot_4x8 =
- device->rad_info.has_accelerated_dot_product && device->rad_info.gfx_level >= GFX11,
+ .has_sudot_4x8 = device->rad_info.has_accelerated_dot_product && device->rad_info.gfx_level >= GFX11,
.has_udot_4x8 = device->rad_info.has_accelerated_dot_product,
- .has_dot_2x16 =
- device->rad_info.has_accelerated_dot_product && device->rad_info.gfx_level < GFX11,
+ .has_dot_2x16 = device->rad_info.has_accelerated_dot_product && device->rad_info.gfx_level < GFX11,
.has_find_msb_rev = true,
.has_pack_half_2x16_rtz = true,
.has_fmulz = true,
.vectorize_vec2_16bit = true,
/* nir_lower_int64() isn't actually called for the LLVM backend,
* but this helps the loop unrolling heuristics. */
- .lower_int64_options = nir_lower_imul64 | nir_lower_imul_high64 | nir_lower_imul_2x32_64 |
- nir_lower_divmod64 | nir_lower_minmax64 | nir_lower_iabs64 |
- nir_lower_iadd_sat64,
+ .lower_int64_options = nir_lower_imul64 | nir_lower_imul_high64 | nir_lower_imul_2x32_64 | nir_lower_divmod64 |
+ nir_lower_minmax64 | nir_lower_iabs64 | nir_lower_iadd_sat64,
.lower_doubles_options = nir_lower_drcp | nir_lower_dsqrt | nir_lower_drsq | nir_lower_ddiv,
.divergence_analysis_options = nir_divergence_view_index_uniform,
};
if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS))
return false;
- if ((is_meta_shader(nir) || meta_shader) &&
- !(device->instance->debug_flags & RADV_DEBUG_DUMP_META_SHADERS))
+ if ((is_meta_shader(nir) || meta_shader) && !(device->instance->debug_flags & RADV_DEBUG_DUMP_META_SHADERS))
return false;
return true;
NIR_PASS(progress, shader, nir_opt_remove_phis);
NIR_PASS(progress, shader, nir_opt_dce);
}
- NIR_PASS(progress, shader, nir_opt_if,
- nir_opt_if_aggressive_last_continue | nir_opt_if_optimize_phi_true_false);
+ NIR_PASS(progress, shader, nir_opt_if, nir_opt_if_aggressive_last_continue | nir_opt_if_optimize_phi_true_false);
NIR_PASS(progress, shader, nir_opt_dead_cf);
NIR_PASS(progress, shader, nir_opt_cse);
NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true);
} while (progress && !optimize_conservatively);
NIR_PASS(progress, shader, nir_opt_shrink_vectors);
- NIR_PASS(progress, shader, nir_remove_dead_variables,
- nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL);
+ NIR_PASS(progress, shader, nir_remove_dead_variables, nir_var_function_temp | nir_var_shader_in | nir_var_shader_out,
+ NULL);
- if (shader->info.stage == MESA_SHADER_FRAGMENT &&
- (shader->info.fs.uses_discard || shader->info.fs.uses_demote)) {
+ if (shader->info.stage == MESA_SHADER_FRAGMENT && (shader->info.fs.uses_discard || shader->info.fs.uses_demote)) {
NIR_PASS(progress, shader, nir_opt_conditional_discard);
NIR_PASS(progress, shader, nir_opt_move_discards_to_top);
}
};
static void
-radv_spirv_nir_debug(void *private_data, enum nir_spirv_debug_level level, size_t spirv_offset,
- const char *message)
+radv_spirv_nir_debug(void *private_data, enum nir_spirv_debug_level level, size_t spirv_offset, const char *message)
{
struct radv_shader_debug_data *debug_data = private_data;
struct radv_instance *instance = debug_data->device->instance;
/* VK_DEBUG_REPORT_DEBUG_BIT_EXT specifies diagnostic information
* from the implementation and layers.
*/
- vk_debug_report(&instance->vk, vk_flags[level] | VK_DEBUG_REPORT_DEBUG_BIT_EXT,
- NULL, 0, 0, "radv", message);
+ vk_debug_report(&instance->vk, vk_flags[level] | VK_DEBUG_REPORT_DEBUG_BIT_EXT, NULL, 0, 0, "radv", message);
}
static bool
if (var->data.mode != nir_var_shader_out)
return true;
- return !var->data.explicit_xfb_buffer &&
- !var->data.explicit_xfb_stride;
+ return !var->data.explicit_xfb_buffer && !var->data.explicit_xfb_stride;
}
nir_shader *
assert(stage->spirv.size % 4 == 0);
bool dump_meta = device->instance->debug_flags & RADV_DEBUG_DUMP_META_SHADERS;
- if ((device->instance->debug_flags & RADV_DEBUG_DUMP_SPIRV) &&
- (!is_internal || dump_meta))
+ if ((device->instance->debug_flags & RADV_DEBUG_DUMP_SPIRV) && (!is_internal || dump_meta))
radv_print_spirv(stage->spirv.data, stage->spirv.size, stderr);
uint32_t num_spec_entries = 0;
- struct nir_spirv_specialization *spec_entries =
- vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries);
+ struct nir_spirv_specialization *spec_entries = vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries);
struct radv_shader_debug_data spirv_debug_data = {
.device = device,
.object = stage->spirv.object,
},
.force_tex_non_uniform = key->tex_non_uniform,
};
- nir = spirv_to_nir(spirv, stage->spirv.size / 4, spec_entries, num_spec_entries, stage->stage,
- stage->entrypoint, &spirv_options,
- &device->physical_device->nir_options[stage->stage]);
+ nir = spirv_to_nir(spirv, stage->spirv.size / 4, spec_entries, num_spec_entries, stage->stage, stage->entrypoint,
+ &spirv_options, &device->physical_device->nir_options[stage->stage]);
nir->info.internal |= is_internal;
assert(nir->info.stage == stage->stage);
nir_validate_shader(nir, "after spirv_to_nir");
NIR_PASS(_, nir, nir_opt_deref);
/* Pick off the single entrypoint that we want */
- foreach_list_typed_safe(nir_function, func, node, &nir->functions)
- {
+ foreach_list_typed_safe (nir_function, func, node, &nir->functions) {
if (func->is_entrypoint)
func->name = ralloc_strdup(func, "main");
else
.can_remove_var = is_not_xfb_output,
};
NIR_PASS(_, nir, nir_remove_dead_variables,
- nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
- &dead_vars_opts);
+ nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared, &dead_vars_opts);
/* Variables can make nir_propagate_invariant more conservative
* than it needs to be.
NIR_PASS(_, nir, nir_lower_clip_cull_distance_arrays);
- if (nir->info.stage == MESA_SHADER_VERTEX ||
- nir->info.stage == MESA_SHADER_TESS_EVAL ||
+ if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL ||
nir->info.stage == MESA_SHADER_GEOMETRY)
NIR_PASS_V(nir, nir_shader_gather_xfb_info);
*/
.lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH,
.lower_local_invocation_index = nir->info.stage == MESA_SHADER_COMPUTE &&
- ((nir->info.workgroup_size[0] == 1) +
- (nir->info.workgroup_size[1] == 1) +
+ ((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) +
(nir->info.workgroup_size[2] == 1)) == 2,
};
NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options);
if (!nir->info.shared_memory_explicit_layout)
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info);
- NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared,
- nir_address_format_32bit_offset);
+ NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);
}
NIR_PASS(_, nir, nir_opt_ray_queries);
*/
NIR_PASS(_, nir, nir_lower_var_copies);
- unsigned lower_flrp = (nir->options->lower_flrp16 ? 16 : 0) |
- (nir->options->lower_flrp32 ? 32 : 0) |
+ unsigned lower_flrp = (nir->options->lower_flrp16 ? 16 : 0) | (nir->options->lower_flrp32 ? 32 : 0) |
(nir->options->lower_flrp64 ? 64 : 0);
if (lower_flrp != 0) {
bool progress = false;
NIR_PASS(_, nir, radv_nir_lower_intrinsics_early, key);
/* Lower deref operations for compute shared memory. */
- if (nir->info.stage == MESA_SHADER_COMPUTE ||
- nir->info.stage == MESA_SHADER_TASK ||
+ if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK ||
nir->info.stage == MESA_SHADER_MESH) {
nir_variable_mode var_modes = nir_var_mem_shared;
- if (nir->info.stage == MESA_SHADER_TASK ||
- nir->info.stage == MESA_SHADER_MESH)
+ if (nir->info.stage == MESA_SHADER_TASK || nir->info.stage == MESA_SHADER_MESH)
var_modes |= nir_var_mem_task_payload;
if (!nir->info.shared_memory_explicit_layout)
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, var_modes, shared_var_info);
else if (var_modes & ~nir_var_mem_shared)
- NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, var_modes & ~nir_var_mem_shared,
- shared_var_info);
+ NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, var_modes & ~nir_var_mem_shared, shared_var_info);
NIR_PASS(_, nir, nir_lower_explicit_io, var_modes, nir_address_format_32bit_offset);
if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) {
}
}
- NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global | nir_var_mem_constant,
- nir_address_format_64bit_global);
+ NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global | nir_var_mem_constant, nir_address_format_64bit_global);
/* Lower large variables that are always constant with load_constant
* intrinsics, which get turned into PC-relative loads from a data
NIR_PASS(_, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16);
/* Lower primitive shading rate to match HW requirements. */
- if ((nir->info.stage == MESA_SHADER_VERTEX ||
- nir->info.stage == MESA_SHADER_GEOMETRY ||
+ if ((nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_GEOMETRY ||
nir->info.stage == MESA_SHADER_MESH) &&
nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE)) {
/* Lower primitive shading rate to match HW requirements. */
- NIR_PASS(_, nir, radv_nir_lower_primitive_shading_rate,
- device->physical_device->rad_info.gfx_level);
+ NIR_PASS(_, nir, radv_nir_lower_primitive_shading_rate, device->physical_device->rad_info.gfx_level);
}
/* Indirect lowering must be called after the radv_optimize_nir() loop
* bloat the instruction count of the loop and cause it to be
* considered too large for unrolling.
*/
- if (ac_nir_lower_indirect_derefs(nir, device->physical_device->rad_info.gfx_level) &&
- !key->optimisations_disabled && nir->info.stage != MESA_SHADER_COMPUTE) {
+ if (ac_nir_lower_indirect_derefs(nir, device->physical_device->rad_info.gfx_level) && !key->optimisations_disabled &&
+ nir->info.stage != MESA_SHADER_COMPUTE) {
/* Optimize the lowered code before the linking optimizations. */
radv_optimize_nir(nir, false);
}
}
static void
-setup_ngg_lds_layout(struct radv_device *device, nir_shader *nir, struct radv_shader_info *info,
- unsigned max_vtx_in)
+setup_ngg_lds_layout(struct radv_device *device, nir_shader *nir, struct radv_shader_info *info, unsigned max_vtx_in)
{
unsigned scratch_lds_base = 0;
gl_shader_stage stage = nir->info.stage;
if (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL) {
/* Get pervertex LDS usage. */
- bool uses_instanceid =
- BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
- bool uses_primitive_id =
- BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
+ bool uses_instanceid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
+ bool uses_primitive_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
bool streamout_enabled = nir->xfb_info && device->physical_device->use_ngg_streamout;
- unsigned pervertex_lds_bytes =
- ac_ngg_nogs_get_pervertex_lds_size(stage,
- nir->num_outputs,
- streamout_enabled,
- info->outinfo.export_prim_id,
- false, /* user edge flag */
- info->has_ngg_culling,
- uses_instanceid,
- uses_primitive_id);
+ unsigned pervertex_lds_bytes = ac_ngg_nogs_get_pervertex_lds_size(
+ stage, nir->num_outputs, streamout_enabled, info->outinfo.export_prim_id, false, /* user edge flag */
+ info->has_ngg_culling, uses_instanceid, uses_primitive_id);
unsigned total_es_lds_bytes = pervertex_lds_bytes * max_vtx_in;
scratch_lds_base = ALIGN(total_es_lds_bytes, 8u);
} else if (stage == MESA_SHADER_GEOMETRY) {
unsigned esgs_ring_lds_bytes = info->ngg_info.esgs_ring_size;
unsigned gs_total_out_vtx_bytes = info->ngg_info.ngg_emit_size * 4u;
- scratch_lds_base =
- ALIGN(esgs_ring_lds_bytes + gs_total_out_vtx_bytes, 8u /* for the repacking code */);
+ scratch_lds_base = ALIGN(esgs_ring_lds_bytes + gs_total_out_vtx_bytes, 8u /* for the repacking code */);
} else {
/* not handled here */
return;
}
/* Get scratch LDS usage. */
- unsigned scratch_lds_size =
- ac_ngg_get_scratch_lds_size(stage,
- info->workgroup_size,
- info->wave_size,
- device->physical_device->use_ngg_streamout,
- info->has_ngg_culling);
+ unsigned scratch_lds_size = ac_ngg_get_scratch_lds_size(
+ stage, info->workgroup_size, info->wave_size, device->physical_device->use_ngg_streamout, info->has_ngg_culling);
/* Get total LDS usage. */
nir->info.shared_size = scratch_lds_base + scratch_lds_size;
info->ngg_info.scratch_lds_base = scratch_lds_base;
}
-void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_stage,
- const struct radv_pipeline_key *pl_key)
+void
+radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_stage,
+ const struct radv_pipeline_key *pl_key)
{
const struct radv_shader_info *info = &ngg_stage->info;
nir_shader *nir = ngg_stage->nir;
- assert(nir->info.stage == MESA_SHADER_VERTEX ||
- nir->info.stage == MESA_SHADER_TESS_EVAL ||
- nir->info.stage == MESA_SHADER_GEOMETRY ||
- nir->info.stage == MESA_SHADER_MESH);
+ assert(nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL ||
+ nir->info.stage == MESA_SHADER_GEOMETRY || nir->info.stage == MESA_SHADER_MESH);
const struct gfx10_ngg_info *ngg_info = &info->ngg_info;
unsigned num_vertices_per_prim = 3;
options.has_xfb_prim_query = info->has_ngg_xfb_query;
options.force_vrs = info->force_vrs_per_vertex;
- if (nir->info.stage == MESA_SHADER_VERTEX ||
- nir->info.stage == MESA_SHADER_TESS_EVAL) {
+ if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) {
assert(info->is_ngg);
if (info->has_ngg_culling)
NIR_PASS_V(nir, ac_nir_lower_ngg_gs, &options);
} else if (nir->info.stage == MESA_SHADER_MESH) {
bool scratch_ring = false;
- NIR_PASS_V(nir, ac_nir_lower_ngg_ms,
- options.gfx_level,
- options.clipdist_enable_mask,
- options.vs_output_param_offset,
- options.has_param_exports,
- &scratch_ring,
- info->wave_size,
+ NIR_PASS_V(nir, ac_nir_lower_ngg_ms, options.gfx_level, options.clipdist_enable_mask,
+ options.vs_output_param_offset, options.has_param_exports, &scratch_ring, info->wave_size,
pl_key->has_multiview_view_index);
ngg_stage->info.ms.needs_ms_scratch_ring = scratch_ring;
} else {
get_size_class(unsigned size, bool round_up)
{
size = round_up ? util_logbase2_ceil(size) : util_logbase2(size);
- unsigned size_class =
- MAX2(size, RADV_SHADER_ALLOC_MIN_SIZE_CLASS) - RADV_SHADER_ALLOC_MIN_SIZE_CLASS;
+ unsigned size_class = MAX2(size, RADV_SHADER_ALLOC_MIN_SIZE_CLASS) - RADV_SHADER_ALLOC_MIN_SIZE_CLASS;
return MIN2(size_class, RADV_SHADER_ALLOC_NUM_FREE_LISTS - 1);
}
.semaphoreCount = 1,
.pValues = &seq,
};
- return device->vk.dispatch_table.WaitSemaphores(radv_device_to_handle(device), &wait_info,
- UINT64_MAX);
+ return device->vk.dispatch_table.WaitSemaphores(radv_device_to_handle(device), &wait_info, UINT64_MAX);
}
/* Segregated fit allocator, implementing a good-fit allocation policy.
* at the first one available.
*/
unsigned free_list_mask = BITFIELD_MASK(RADV_SHADER_ALLOC_NUM_FREE_LISTS);
- unsigned size_class =
- ffs(device->shader_free_list_mask & (free_list_mask << get_size_class(size, true)));
+ unsigned size_class = ffs(device->shader_free_list_mask & (free_list_mask << get_size_class(size, true)));
if (size_class) {
size_class--;
- list_for_each_entry(union radv_shader_arena_block, hole,
- &device->shader_free_lists[size_class], freelist)
- {
+ list_for_each_entry (union radv_shader_arena_block, hole, &device->shader_free_lists[size_class], freelist) {
if (hole->size < size)
continue;
goto fail;
unsigned arena_size =
- MAX2(RADV_SHADER_ALLOC_MIN_ARENA_SIZE
- << MIN2(RADV_SHADER_ALLOC_MAX_ARENA_SIZE_SHIFT, device->shader_arena_shift),
+ MAX2(RADV_SHADER_ALLOC_MIN_ARENA_SIZE << MIN2(RADV_SHADER_ALLOC_MAX_ARENA_SIZE_SHIFT, device->shader_arena_shift),
size);
enum radeon_bo_flag flags = RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_32BIT;
if (device->shader_use_invisible_vram)
flags |= RADEON_FLAG_NO_CPU_ACCESS;
else
- flags |=
- (device->physical_device->rad_info.cpdma_prefetch_writes_memory ? 0
- : RADEON_FLAG_READ_ONLY);
+ flags |= (device->physical_device->rad_info.cpdma_prefetch_writes_memory ? 0 : RADEON_FLAG_READ_ONLY);
VkResult result;
- result =
- device->ws->buffer_create(device->ws, arena_size, RADV_SHADER_ALLOC_ALIGNMENT,
- RADEON_DOMAIN_VRAM, flags, RADV_BO_PRIORITY_SHADER, 0, &arena->bo);
+ result = device->ws->buffer_create(device->ws, arena_size, RADV_SHADER_ALLOC_ALIGNMENT, RADEON_DOMAIN_VRAM, flags,
+ RADV_BO_PRIORITY_SHADER, 0, &arena->bo);
if (result != VK_SUCCESS)
goto fail;
radv_rmv_log_bo_allocate(device, arena->bo, arena_size, true);
void
radv_destroy_shader_arenas(struct radv_device *device)
{
- list_for_each_entry_safe(union radv_shader_arena_block, block, &device->shader_block_obj_pool,
- pool) free(block);
+ list_for_each_entry_safe (union radv_shader_arena_block, block, &device->shader_block_obj_pool, pool)
+ free(block);
- list_for_each_entry_safe(struct radv_shader_arena, arena, &device->shader_arenas, list)
- {
+ list_for_each_entry_safe (struct radv_shader_arena, arena, &device->shader_arenas, list) {
radv_rmv_log_bo_destroy(device, arena->bo);
device->ws->buffer_destroy(device->ws, arena->bo);
free(arena);
if (device->shader_upload_sem)
disp->DestroySemaphore(radv_device_to_handle(device), device->shader_upload_sem, NULL);
- list_for_each_entry_safe(struct radv_shader_dma_submission, submission,
- &device->shader_dma_submissions, list)
- {
+ list_for_each_entry_safe (struct radv_shader_dma_submission, submission, &device->shader_dma_submissions, list) {
if (submission->cs)
ws->cs_destroy(submission->cs);
if (submission->bo)
}
static bool
-radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage,
- const struct radv_shader_info *info)
+radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info)
{
enum amd_gfx_level chip = device->physical_device->rad_info.gfx_level;
switch (stage) {
}
if (!pdevice->use_ngg_streamout) {
- config->rsrc2 |=
- S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) | S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
- S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) | S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
- S_00B12C_SO_EN(!!info->so.num_outputs);
+ config->rsrc2 |= S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) | S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
+ S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) | S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
+ S_00B12C_SO_EN(!!info->so.num_outputs);
}
- config->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / (info->wave_size == 32 ? 8 : 4)) |
- S_00B848_DX10_CLAMP(1) | S_00B848_FLOAT_MODE(config->float_mode);
+ config->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / (info->wave_size == 32 ? 8 : 4)) | S_00B848_DX10_CLAMP(1) |
+ S_00B848_FLOAT_MODE(config->float_mode);
if (pdevice->rad_info.gfx_level >= GFX10) {
config->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(args->num_user_sgprs >> 5);
} else {
config->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en);
}
- config->rsrc1 |=
- S_00B428_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10) | S_00B428_WGP_MODE(wgp_mode);
+ config->rsrc1 |= S_00B428_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10) | S_00B428_WGP_MODE(wgp_mode);
config->rsrc2 |= S_00B42C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
break;
case MESA_SHADER_VERTEX:
FALLTHROUGH;
case MESA_SHADER_COMPUTE:
case MESA_SHADER_TASK:
- config->rsrc1 |=
- S_00B848_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10) | S_00B848_WGP_MODE(wgp_mode);
- config->rsrc2 |= S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) |
- S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) |
+ config->rsrc1 |= S_00B848_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10) | S_00B848_WGP_MODE(wgp_mode);
+ config->rsrc2 |= S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) | S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) |
S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) |
S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2] ? 2
: info->cs.uses_thread_id[1] ? 1
: 0) |
- S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) |
- S_00B84C_LDS_SIZE(config->lds_size) | S_00B84C_EXCP_EN(excp_en);
+ S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) | S_00B84C_LDS_SIZE(config->lds_size) |
+ S_00B84C_EXCP_EN(excp_en);
config->rsrc3 |= S_00B8A0_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
break;
}
if (pdevice->rad_info.gfx_level >= GFX10 && info->is_ngg &&
- (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL ||
- stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_MESH)) {
+ (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_GEOMETRY ||
+ stage == MESA_SHADER_MESH)) {
unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
gl_shader_stage es_stage = stage;
if (stage == MESA_SHADER_GEOMETRY)
/* TES only needs vertex offset 2 for triangles or quads. */
if (stage == MESA_SHADER_TESS_EVAL)
- need_gs_vtx_offset2 &= info->tes._primitive_mode == TESS_PRIMITIVE_TRIANGLES ||
- info->tes._primitive_mode == TESS_PRIMITIVE_QUADS;
+ need_gs_vtx_offset2 &=
+ info->tes._primitive_mode == TESS_PRIMITIVE_TRIANGLES || info->tes._primitive_mode == TESS_PRIMITIVE_QUADS;
if (info->uses_invocation_id) {
gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
- } else if (info->uses_prim_id || (es_stage == MESA_SHADER_VERTEX &&
- info->outinfo.export_prim_id)) {
+ } else if (info->uses_prim_id || (es_stage == MESA_SHADER_VERTEX && info->outinfo.export_prim_id)) {
gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
} else if (need_gs_vtx_offset2) {
gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
* disable exactly 1 CU per SA for GS.
*/
config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(wgp_mode);
- config->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
- S_00B22C_LDS_SIZE(config->lds_size) |
+ config->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | S_00B22C_LDS_SIZE(config->lds_size) |
S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL);
} else if (pdevice->rad_info.gfx_level >= GFX9 && stage == MESA_SHADER_GEOMETRY) {
unsigned es_type = info->gs.es_type;
}
config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(wgp_mode);
- config->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
- S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL);
+ config->rsrc2 |=
+ S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL);
} else if (pdevice->rad_info.gfx_level >= GFX9 && stage == MESA_SHADER_TESS_CTRL) {
config->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt);
} else {
}
static VkResult
-radv_shader_dma_resize_upload_buf(struct radv_shader_dma_submission *submission,
- struct radeon_winsys *ws, uint64_t size)
+radv_shader_dma_resize_upload_buf(struct radv_shader_dma_submission *submission, struct radeon_winsys *ws,
+ uint64_t size)
{
if (submission->bo)
ws->buffer_destroy(ws, submission->bo);
- VkResult result =
- ws->buffer_create(ws, size, RADV_SHADER_ALLOC_ALIGNMENT, RADEON_DOMAIN_GTT,
- RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC,
- RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &submission->bo);
+ VkResult result = ws->buffer_create(
+ ws, size, RADV_SHADER_ALLOC_ALIGNMENT, RADEON_DOMAIN_GTT,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC,
+ RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &submission->bo);
if (result != VK_SUCCESS)
return result;
while (list_is_empty(&device->shader_dma_submissions))
cnd_wait(&device->shader_dma_submission_list_cond, &device->shader_dma_submission_list_mutex);
- submission =
- list_first_entry(&device->shader_dma_submissions, struct radv_shader_dma_submission, list);
+ submission = list_first_entry(&device->shader_dma_submissions, struct radv_shader_dma_submission, list);
list_del(&submission->list);
mtx_unlock(&device->shader_dma_submission_list_mutex);
}
void
-radv_shader_dma_push_submission(struct radv_device *device,
- struct radv_shader_dma_submission *submission, uint64_t seq)
+radv_shader_dma_push_submission(struct radv_device *device, struct radv_shader_dma_submission *submission, uint64_t seq)
{
submission->seq = seq;
}
struct radv_shader_dma_submission *
-radv_shader_dma_get_submission(struct radv_device *device, struct radeon_winsys_bo *bo, uint64_t va,
- uint64_t size)
+radv_shader_dma_get_submission(struct radv_device *device, struct radeon_winsys_bo *bo, uint64_t va, uint64_t size)
{
struct radv_shader_dma_submission *submission = radv_shader_dma_pop_submission(device);
struct radeon_cmdbuf *cs = submission->cs;
};
result = ws->cs_submit(device->shader_upload_hw_ctx, &submit, 0, NULL, 1, &signal_info);
- if (result != VK_SUCCESS)
- {
+ if (result != VK_SUCCESS) {
mtx_unlock(&device->shader_upload_hw_ctx_mutex);
radv_shader_dma_push_submission(device, submission, 0);
return false;
return true;
}
-
struct radv_shader *
radv_shader_create(struct radv_device *device, const struct radv_shader_binary *binary)
{
if (!shader)
return NULL;
- vk_pipeline_cache_object_init(&device->vk, &shader->base, &radv_shader_ops, shader->sha1,
- SHA1_DIGEST_LENGTH);
+ vk_pipeline_cache_object_init(&device->vk, &shader->base, &radv_shader_ops, shader->sha1, SHA1_DIGEST_LENGTH);
shader->info = binary->info;
if (device->shader_use_invisible_vram) {
uint64_t va = radv_buffer_get_va(shader_part->alloc->arena->bo) + shader_part->alloc->offset;
- submission =
- radv_shader_dma_get_submission(device, shader_part->alloc->arena->bo, va, code_size);
+ submission = radv_shader_dma_get_submission(device, shader_part->alloc->arena->bo, va, code_size);
if (!submission)
return false;
}
struct radv_shader_part *
-radv_shader_part_create(struct radv_device *device, struct radv_shader_part_binary *binary,
- unsigned wave_size)
+radv_shader_part_create(struct radv_device *device, struct radv_shader_part_binary *binary, unsigned wave_size)
{
uint32_t code_size = radv_get_shader_binary_size(binary->code_size);
struct radv_shader_part *shader_part;
shader_part->ref_count = 1;
shader_part->code_size = code_size;
- shader_part->rsrc1 = S_00B848_VGPRS((binary->num_vgprs - 1) / (wave_size == 32 ? 8 : 4)) |
- S_00B228_SGPRS((binary->num_sgprs - 1) / 8);
- shader_part->disasm_string =
- binary->disasm_size ? strdup((const char *)(binary->data + binary->code_size)) : NULL;
+ shader_part->rsrc1 =
+ S_00B848_VGPRS((binary->num_vgprs - 1) / (wave_size == 32 ? 8 : 4)) | S_00B228_SGPRS((binary->num_sgprs - 1) / 8);
+ shader_part->disasm_string = binary->disasm_size ? strdup((const char *)(binary->data + binary->code_size)) : NULL;
shader_part->spi_shader_col_format = binary->info.spi_shader_col_format;
}
static void
-radv_aco_build_shader_binary(void **bin, const struct ac_shader_config *config,
- const char *llvm_ir_str, unsigned llvm_ir_size, const char *disasm_str,
- unsigned disasm_size, uint32_t *statistics, uint32_t stats_size,
- uint32_t exec_size, const uint32_t *code, uint32_t code_dw,
+radv_aco_build_shader_binary(void **bin, const struct ac_shader_config *config, const char *llvm_ir_str,
+ unsigned llvm_ir_size, const char *disasm_str, unsigned disasm_size, uint32_t *statistics,
+ uint32_t stats_size, uint32_t exec_size, const uint32_t *code, uint32_t code_dw,
const struct aco_symbol *symbols, unsigned num_symbols)
{
struct radv_shader_binary **binary = (struct radv_shader_binary **)bin;
memcpy(legacy_binary->data, statistics, stats_size);
legacy_binary->stats_size = stats_size;
- memcpy(legacy_binary->data + legacy_binary->stats_size, code,
- code_dw * sizeof(uint32_t));
+ memcpy(legacy_binary->data + legacy_binary->stats_size, code, code_dw * sizeof(uint32_t));
legacy_binary->exec_size = exec_size;
legacy_binary->code_size = code_dw * sizeof(uint32_t);
legacy_binary->disasm_size = 0;
legacy_binary->ir_size = llvm_ir_size;
- memcpy((char*)legacy_binary->data + legacy_binary->stats_size + legacy_binary->code_size,
- llvm_ir_str, llvm_ir_size);
+ memcpy((char *)legacy_binary->data + legacy_binary->stats_size + legacy_binary->code_size, llvm_ir_str,
+ llvm_ir_size);
legacy_binary->disasm_size = disasm_size;
if (disasm_size) {
- memcpy((char*)legacy_binary->data + legacy_binary->stats_size +
- legacy_binary->code_size + llvm_ir_size, disasm_str,
- disasm_size);
+ memcpy((char *)legacy_binary->data + legacy_binary->stats_size + legacy_binary->code_size + llvm_ir_size,
+ disasm_str, disasm_size);
}
- *binary = (struct radv_shader_binary*)legacy_binary;
+ *binary = (struct radv_shader_binary *)legacy_binary;
}
static void
-radv_fill_nir_compiler_options(struct radv_nir_compiler_options *options,
- struct radv_device *device, const struct radv_pipeline_key *key,
- bool should_use_wgp, bool can_dump_shader, bool is_meta_shader,
- bool keep_shader_info, bool keep_statistic_info)
+radv_fill_nir_compiler_options(struct radv_nir_compiler_options *options, struct radv_device *device,
+ const struct radv_pipeline_key *key, bool should_use_wgp, bool can_dump_shader,
+ bool is_meta_shader, bool keep_shader_info, bool keep_statistic_info)
{
if (key)
options->key = *key;
options->wgp_mode = should_use_wgp;
options->info = &device->physical_device->rad_info;
options->dump_shader = can_dump_shader;
- options->dump_preoptir =
- options->dump_shader && device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
+ options->dump_preoptir = options->dump_shader && device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
options->record_ir = keep_shader_info;
options->record_stats = keep_statistic_info;
options->check_ir = device->instance->debug_flags & RADV_DEBUG_CHECKIR;
const char *disasm_data;
size_t disasm_size;
- if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data,
- &disasm_size)) {
+ if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data, &disasm_size)) {
return;
}
- shader->ir_string =
- bin->llvm_ir_size ? strdup((const char *)(bin->data + bin->elf_size)) : NULL;
+ shader->ir_string = bin->llvm_ir_size ? strdup((const char *)(bin->data + bin->elf_size)) : NULL;
shader->disasm_string = malloc(disasm_size + 1);
memcpy(shader->disasm_string, disasm_data, disasm_size);
shader->disasm_string[disasm_size] = 0;
} else {
struct radv_shader_binary_legacy *bin = (struct radv_shader_binary_legacy *)binary;
- shader->ir_string =
- bin->ir_size ? strdup((const char *)(bin->data + bin->stats_size + bin->code_size)) : NULL;
+ shader->ir_string = bin->ir_size ? strdup((const char *)(bin->data + bin->stats_size + bin->code_size)) : NULL;
shader->disasm_string =
- bin->disasm_size
- ? strdup((const char *)(bin->data + bin->stats_size + bin->code_size + bin->ir_size))
- : NULL;
+ bin->disasm_size ? strdup((const char *)(bin->data + bin->stats_size + bin->code_size + bin->ir_size)) : NULL;
}
}
static struct radv_shader_binary *
-shader_compile(struct radv_device *device, struct nir_shader *const *shaders, int shader_count,
- gl_shader_stage stage, const struct radv_shader_info *info,
- const struct radv_shader_args *args, struct radv_nir_compiler_options *options)
+shader_compile(struct radv_device *device, struct nir_shader *const *shaders, int shader_count, gl_shader_stage stage,
+ const struct radv_shader_info *info, const struct radv_shader_args *args,
+ struct radv_nir_compiler_options *options)
{
struct radv_shader_debug_data debug_data = {
.device = device,
struct aco_compiler_options ac_opts;
radv_aco_convert_opts(&ac_opts, options, args);
radv_aco_convert_shader_info(&ac_info, info, args, &options->key);
- aco_compile_shader(&ac_opts, &ac_info, shader_count, shaders, &args->ac, &radv_aco_build_shader_binary, (void **)&binary);
+ aco_compile_shader(&ac_opts, &ac_info, shader_count, shaders, &args->ac, &radv_aco_build_shader_binary,
+ (void **)&binary);
}
binary->info = *info;
struct radv_shader *
radv_shader_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
- struct radv_pipeline_stage *pl_stage, struct nir_shader *const *shaders,
- int shader_count, const struct radv_pipeline_key *key, bool keep_shader_info,
- bool keep_statistic_info, struct radv_shader_binary **binary_out)
+ struct radv_pipeline_stage *pl_stage, struct nir_shader *const *shaders, int shader_count,
+ const struct radv_pipeline_key *key, bool keep_shader_info, bool keep_statistic_info,
+ struct radv_shader_binary **binary_out)
{
gl_shader_stage stage = shaders[shader_count - 1]->info.stage;
struct radv_shader_info *info = &pl_stage->info;
struct radv_nir_compiler_options options = {0};
- radv_fill_nir_compiler_options(
- &options, device, key, radv_should_use_wgp_mode(device, stage, info),
- radv_can_dump_shader(device, shaders[0], false), is_meta_shader(shaders[0]), keep_shader_info,
- keep_statistic_info);
+ radv_fill_nir_compiler_options(&options, device, key, radv_should_use_wgp_mode(device, stage, info),
+ radv_can_dump_shader(device, shaders[0], false), is_meta_shader(shaders[0]),
+ keep_shader_info, keep_statistic_info);
struct radv_shader_binary *binary =
shader_compile(device, shaders, shader_count, stage, info, &pl_stage->args, &options);
struct radv_shader_info info = {0};
struct radv_pipeline_key key = {0};
struct radv_nir_compiler_options options = {0};
- radv_fill_nir_compiler_options(&options, device, &key,
- radv_should_use_wgp_mode(device, stage, &info), false, false,
+ radv_fill_nir_compiler_options(&options, device, &key, radv_should_use_wgp_mode(device, stage, &info), false, false,
false, false);
nir_builder b = radv_meta_init_shader(device, stage, "meta_trap_handler");
info.wave_size = 64;
struct radv_shader_args args;
- radv_declare_shader_args(device, &key, &info, stage, MESA_SHADER_NONE,
- RADV_SHADER_TYPE_TRAP_HANDLER, &args);
+ radv_declare_shader_args(device, &key, &info, stage, MESA_SHADER_NONE, RADV_SHADER_TYPE_TRAP_HANDLER, &args);
- struct radv_shader_binary *binary =
- shader_compile(device, &b.shader, 1, stage, &info, &args, &options);
+ struct radv_shader_binary *binary = shader_compile(device, &b.shader, 1, stage, &info, &args, &options);
struct radv_shader *shader = radv_shader_create(device, binary);
ralloc_free(b.shader);
return shader;
}
-static void radv_aco_build_shader_part(void **bin,
- uint32_t num_sgprs,
- uint32_t num_vgprs,
- const uint32_t *code,
- uint32_t code_size,
- const char *disasm_str,
- uint32_t disasm_size)
+static void
+radv_aco_build_shader_part(void **bin, uint32_t num_sgprs, uint32_t num_vgprs, const uint32_t *code, uint32_t code_size,
+ const char *disasm_str, uint32_t disasm_size)
{
struct radv_shader_part_binary **binary = (struct radv_shader_part_binary **)bin;
size_t size = code_size * sizeof(uint32_t) + sizeof(struct radv_shader_part_binary);
part_binary->code_size = code_size * sizeof(uint32_t);
memcpy(part_binary->data, code, part_binary->code_size);
if (disasm_size) {
- memcpy((char*)part_binary->data + part_binary->code_size,
- disasm_str, disasm_size);
+ memcpy((char *)part_binary->data + part_binary->code_size, disasm_str, disasm_size);
part_binary->disasm_size = disasm_size;
}
struct aco_compiler_options ac_opts;
radv_aco_convert_shader_info(&ac_info, &info, &in_args, &options.key);
radv_aco_convert_opts(&ac_opts, &options, &in_args);
- aco_compile_rt_prolog(&ac_opts, &ac_info, &in_args.ac, &out_args.ac,
- &radv_aco_build_shader_binary, (void **)&binary);
+ aco_compile_rt_prolog(&ac_opts, &ac_info, &in_args.ac, &out_args.ac, &radv_aco_build_shader_binary,
+ (void **)&binary);
binary->info = info;
radv_postprocess_binary_config(device, binary, &in_args);
struct radv_pipeline_key pipeline_key = {0};
- radv_declare_shader_args(
- device, &pipeline_key, &info, key->next_stage,
- key->next_stage != MESA_SHADER_VERTEX ? MESA_SHADER_VERTEX : MESA_SHADER_NONE,
- RADV_SHADER_TYPE_DEFAULT, &args);
+ radv_declare_shader_args(device, &pipeline_key, &info, key->next_stage,
+ key->next_stage != MESA_SHADER_VERTEX ? MESA_SHADER_VERTEX : MESA_SHADER_NONE,
+ RADV_SHADER_TYPE_DEFAULT, &args);
info.user_sgprs_locs = args.user_sgprs_locs;
info.inline_push_constant_mask = args.ac.inline_push_const_mask;
radv_aco_convert_shader_info(&ac_info, &info, &args, &options.key);
radv_aco_convert_opts(&ac_opts, &options, &args);
radv_aco_convert_vs_prolog_key(&ac_prolog_info, key, &args);
- aco_compile_vs_prolog(&ac_opts, &ac_info, &ac_prolog_info, &args.ac, &radv_aco_build_shader_part,
- (void **)&binary);
+ aco_compile_vs_prolog(&ac_opts, &ac_info, &ac_prolog_info, &args.ac, &radv_aco_build_shader_part, (void **)&binary);
prolog = radv_shader_part_create(device, binary, info.wave_size);
if (!prolog)
radv_aco_convert_shader_info(&ac_info, &info, &args, &options.key);
radv_aco_convert_opts(&ac_opts, &options, &args);
radv_aco_convert_ps_epilog_key(&ac_epilog_info, key, &args);
- aco_compile_ps_epilog(&ac_opts, &ac_info, &ac_epilog_info, &args.ac, &radv_aco_build_shader_part,
- (void **)&binary);
+ aco_compile_ps_epilog(&ac_opts, &ac_info, &ac_epilog_info, &args.ac, &radv_aco_build_shader_part, (void **)&binary);
binary->info.spi_shader_col_format = key->spi_shader_col_format;
radv_find_shader(struct radv_device *device, uint64_t pc)
{
mtx_lock(&device->shader_arena_mutex);
- list_for_each_entry(struct radv_shader_arena, arena, &device->shader_arenas, list)
- {
+ list_for_each_entry (struct radv_shader_arena, arena, &device->shader_arenas, list) {
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#endif
- list_for_each_entry(union radv_shader_arena_block, block, &arena->entries, list)
- {
+ list_for_each_entry (union radv_shader_arena_block, block, &arena->entries, list) {
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
if (!shader)
continue;
- if (pc >= shader->va &&
- pc < shader->va + align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT))
+ if (pc >= shader->va && pc < shader->va + align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT))
return shader;
}
}
}
unsigned
-radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader,
- gl_shader_stage stage)
+radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader, gl_shader_stage stage)
{
const struct radeon_info *info = &device->physical_device->rad_info;
const enum amd_gfx_level gfx_level = info->gfx_level;
max_simd_waves = info->max_wave64_per_simd * (64 / wave_size);
if (stage == MESA_SHADER_FRAGMENT) {
- lds_per_wave =
- conf->lds_size * info->lds_encode_granularity + shader->info.ps.num_interp * 48;
+ lds_per_wave = conf->lds_size * info->lds_encode_granularity + shader->info.ps.num_interp * 48;
lds_per_wave = align(lds_per_wave, info->lds_alloc_granularity);
} else if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_TASK) {
unsigned max_workgroup_size = shader->info.workgroup_size;
- lds_per_wave =
- align(conf->lds_size * info->lds_encode_granularity, info->lds_alloc_granularity);
+ lds_per_wave = align(conf->lds_size * info->lds_encode_granularity, info->lds_alloc_granularity);
lds_per_wave /= DIV_ROUND_UP(max_workgroup_size, wave_size);
}
}
unsigned
-radv_compute_spi_ps_input(const struct radv_pipeline_key *pipeline_key,
- const struct radv_shader_info *info)
+radv_compute_spi_ps_input(const struct radv_pipeline_key *pipeline_key, const struct radv_shader_info *info)
{
unsigned spi_ps_input;
S_0286CC_PERSP_SAMPLE_ENA(info->ps.reads_persp_sample) |
S_0286CC_LINEAR_CENTER_ENA(info->ps.reads_linear_center) |
S_0286CC_LINEAR_CENTROID_ENA(info->ps.reads_linear_centroid) |
- S_0286CC_LINEAR_SAMPLE_ENA(info->ps.reads_linear_sample)|
+ S_0286CC_LINEAR_SAMPLE_ENA(info->ps.reads_linear_sample) |
S_0286CC_PERSP_PULL_MODEL_ENA(info->ps.reads_barycentric_model) |
S_0286CC_FRONT_FACE_ENA(info->ps.reads_front_face);
- if (info->ps.reads_frag_coord_mask ||
- info->ps.reads_sample_pos_mask) {
+ if (info->ps.reads_frag_coord_mask || info->ps.reads_sample_pos_mask) {
uint8_t mask = info->ps.reads_frag_coord_mask | info->ps.reads_sample_pos_mask;
for (unsigned i = 0; i < 4; i++) {
}
VkResult
-radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline,
- struct radv_shader *shader, gl_shader_stage stage, FILE *output)
+radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader,
+ gl_shader_stage stage, FILE *output)
{
VkPipelineExecutablePropertiesKHR *props = NULL;
uint32_t prop_count = 0;
pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
pipeline_info.pipeline = radv_pipeline_to_handle(pipeline);
- result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device), &pipeline_info,
- &prop_count, NULL);
+ result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device), &pipeline_info, &prop_count, NULL);
if (result != VK_SUCCESS)
return result;
if (!props)
return VK_ERROR_OUT_OF_HOST_MEMORY;
- result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device), &pipeline_info,
- &prop_count, props);
+ result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device), &pipeline_info, &prop_count, props);
if (result != VK_SUCCESS)
goto fail;
exec_info.pipeline = radv_pipeline_to_handle(pipeline);
exec_info.executableIndex = exec_idx;
- result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device), &exec_info,
- &stat_count, NULL);
+ result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device), &exec_info, &stat_count, NULL);
if (result != VK_SUCCESS)
goto fail;
goto fail;
}
- result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device), &exec_info,
- &stat_count, stats);
+ result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device), &exec_info, &stat_count, stats);
if (result != VK_SUCCESS) {
free(stats);
goto fail;
AC_UD_MAX_UD = AC_UD_CS_MAX_UD,
};
-#define SET_SGPR_FIELD(field, value) \
- (((unsigned)(value) & field##__MASK) << field##__SHIFT)
+#define SET_SGPR_FIELD(field, value) (((unsigned)(value)&field##__MASK) << field##__SHIFT)
-#define TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS__SHIFT 0
-#define TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS__MASK 0x3f
-#define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__SHIFT 6
-#define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__MASK 0xff
+#define TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS__SHIFT 0
+#define TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS__MASK 0x3f
+#define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__SHIFT 6
+#define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__MASK 0xff
#define PS_STATE_NUM_SAMPLES__SHIFT 0
#define PS_STATE_NUM_SAMPLES__MASK 0xf
#define PS_STATE_LINE_RAST_MODE__MASK 0x3
#define PS_STATE_PS_ITER_MASK__SHIFT 6
#define PS_STATE_PS_ITER_MASK__MASK 0xffff
-#define PS_STATE_RAST_PRIM__SHIFT 22
-#define PS_STATE_RAST_PRIM__MASK 0x3
+#define PS_STATE_RAST_PRIM__SHIFT 22
+#define PS_STATE_RAST_PRIM__MASK 0x3
struct radv_streamout_info {
uint16_t num_outputs;
* where the +2 is for 0 of the ir strings. */
uint8_t data[0];
};
-static_assert(sizeof(struct radv_shader_binary_legacy) ==
- offsetof(struct radv_shader_binary_legacy, data),
+static_assert(sizeof(struct radv_shader_binary_legacy) == offsetof(struct radv_shader_binary_legacy, data),
"Unexpected padding");
struct radv_shader_binary_rtld {
void radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively);
void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets);
-void radv_postprocess_nir(struct radv_device *device,
- const struct radv_pipeline_layout *pipeline_layout,
+void radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_layout *pipeline_layout,
const struct radv_pipeline_key *pipeline_key, unsigned last_vgt_api_stage,
struct radv_pipeline_stage *stage);
-nir_shader *radv_parse_rt_stage(struct radv_device *device,
- const VkPipelineShaderStageCreateInfo *sinfo,
+nir_shader *radv_parse_rt_stage(struct radv_device *device, const VkPipelineShaderStageCreateInfo *sinfo,
const struct radv_pipeline_key *key);
void radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
struct radv_pipeline_stage;
-nir_shader *radv_shader_spirv_to_nir(struct radv_device *device,
- const struct radv_pipeline_stage *stage,
- const struct radv_pipeline_key *key,
- bool is_internal);
+nir_shader *radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_stage *stage,
+ const struct radv_pipeline_key *key, bool is_internal);
void radv_init_shader_arenas(struct radv_device *device);
void radv_destroy_shader_arenas(struct radv_device *device);
struct radv_shader_args;
-struct radv_shader *radv_shader_create(struct radv_device *device,
- const struct radv_shader_binary *binary);
+struct radv_shader *radv_shader_create(struct radv_device *device, const struct radv_shader_binary *binary);
-struct radv_shader *radv_shader_create_cached(struct radv_device *device,
- struct vk_pipeline_cache *cache,
+struct radv_shader *radv_shader_create_cached(struct radv_device *device, struct vk_pipeline_cache *cache,
const struct radv_shader_binary *binary);
-struct radv_shader *
-radv_shader_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
- struct radv_pipeline_stage *stage, struct nir_shader *const *shaders,
- int shader_count, const struct radv_pipeline_key *key, bool keep_shader_info,
- bool keep_statistic_info, struct radv_shader_binary **binary_out);
+struct radv_shader *radv_shader_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
+ struct radv_pipeline_stage *stage, struct nir_shader *const *shaders,
+ int shader_count, const struct radv_pipeline_key *key, bool keep_shader_info,
+ bool keep_statistic_info, struct radv_shader_binary **binary_out);
VkResult radv_shader_wait_for_upload(struct radv_device *device, uint64_t seq);
-struct radv_shader_dma_submission *
-radv_shader_dma_pop_submission(struct radv_device *device);
+struct radv_shader_dma_submission *radv_shader_dma_pop_submission(struct radv_device *device);
-void radv_shader_dma_push_submission(struct radv_device *device,
- struct radv_shader_dma_submission *submission,
+void radv_shader_dma_push_submission(struct radv_device *device, struct radv_shader_dma_submission *submission,
uint64_t seq);
-struct radv_shader_dma_submission *radv_shader_dma_get_submission(struct radv_device *device,
- struct radeon_winsys_bo *bo,
- uint64_t va, uint64_t size);
+struct radv_shader_dma_submission *
+radv_shader_dma_get_submission(struct radv_device *device, struct radeon_winsys_bo *bo, uint64_t va, uint64_t size);
-bool radv_shader_dma_submit(struct radv_device *device,
- struct radv_shader_dma_submission *submission,
+bool radv_shader_dma_submit(struct radv_device *device, struct radv_shader_dma_submission *submission,
uint64_t *upload_seq_out);
-union radv_shader_arena_block *radv_alloc_shader_memory(struct radv_device *device, uint32_t size,
- void *ptr);
+union radv_shader_arena_block *radv_alloc_shader_memory(struct radv_device *device, uint32_t size, void *ptr);
void radv_free_shader_memory(struct radv_device *device, union radv_shader_arena_block *alloc);
struct radv_shader *radv_create_trap_handler_shader(struct radv_device *device);
struct radv_shader *radv_create_rt_prolog(struct radv_device *device);
-struct radv_shader_part *radv_shader_part_create(struct radv_device *device,
- struct radv_shader_part_binary *binary,
+struct radv_shader_part *radv_shader_part_create(struct radv_device *device, struct radv_shader_part_binary *binary,
unsigned wave_size);
-struct radv_shader_part *radv_create_vs_prolog(struct radv_device *device,
- const struct radv_vs_prolog_key *key);
+struct radv_shader_part *radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_key *key);
-struct radv_shader_part *radv_create_ps_epilog(struct radv_device *device,
- const struct radv_ps_epilog_key *key,
+struct radv_shader_part *radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_key *key,
struct radv_shader_part_binary **binary_out);
void radv_shader_part_destroy(struct radv_device *device, struct radv_shader_part *shader_part);
uint64_t radv_shader_get_va(const struct radv_shader *shader);
struct radv_shader *radv_find_shader(struct radv_device *device, uint64_t pc);
-unsigned radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader,
- gl_shader_stage stage);
+unsigned radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader, gl_shader_stage stage);
const char *radv_get_shader_name(const struct radv_shader_info *info, gl_shader_stage stage);
-unsigned radv_compute_spi_ps_input(const struct radv_pipeline_key *pipeline_key,
- const struct radv_shader_info *info);
+unsigned radv_compute_spi_ps_input(const struct radv_pipeline_key *pipeline_key, const struct radv_shader_info *info);
bool radv_can_dump_shader(struct radv_device *device, nir_shader *nir, bool meta_shader);
bool radv_can_dump_shader_stats(struct radv_device *device, nir_shader *nir);
-VkResult radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline,
- struct radv_shader *shader, gl_shader_stage stage, FILE *output);
+VkResult radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader,
+ gl_shader_stage stage, FILE *output);
extern const struct vk_pipeline_cache_object_ops radv_shader_ops;
}
static inline unsigned
-calculate_tess_lds_size(enum amd_gfx_level gfx_level, unsigned tcs_num_input_vertices,
- unsigned tcs_num_output_vertices, unsigned tcs_num_inputs,
- unsigned tcs_num_patches, unsigned tcs_num_outputs,
+calculate_tess_lds_size(enum amd_gfx_level gfx_level, unsigned tcs_num_input_vertices, unsigned tcs_num_output_vertices,
+ unsigned tcs_num_inputs, unsigned tcs_num_patches, unsigned tcs_num_outputs,
unsigned tcs_num_patch_outputs)
{
unsigned input_vertex_size = get_tcs_input_vertex_stride(tcs_num_inputs);
}
static inline unsigned
-get_tcs_num_patches(unsigned tcs_num_input_vertices, unsigned tcs_num_output_vertices,
- unsigned tcs_num_inputs, unsigned tcs_num_outputs,
- unsigned tcs_num_patch_outputs, unsigned tess_offchip_block_dw_size,
+get_tcs_num_patches(unsigned tcs_num_input_vertices, unsigned tcs_num_output_vertices, unsigned tcs_num_inputs,
+ unsigned tcs_num_outputs, unsigned tcs_num_patch_outputs, unsigned tess_offchip_block_dw_size,
enum amd_gfx_level gfx_level, enum radeon_family family)
{
uint32_t input_vertex_size = get_tcs_input_vertex_stride(tcs_num_inputs);
void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_stage,
const struct radv_pipeline_key *pl_key);
-bool radv_consider_culling(const struct radv_physical_device *pdevice, struct nir_shader *nir,
- uint64_t ps_inputs_read, unsigned num_vertices_per_primitive,
- const struct radv_shader_info *info);
+bool radv_consider_culling(const struct radv_physical_device *pdevice, struct nir_shader *nir, uint64_t ps_inputs_read,
+ unsigned num_vertices_per_primitive, const struct radv_shader_info *info);
void radv_get_nir_options(struct radv_physical_device *device);
-nir_shader *radv_build_traversal_shader(struct radv_device *device,
- struct radv_ray_tracing_pipeline *pipeline,
+nir_shader *radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const struct radv_pipeline_key *key);
};
static void
-allocate_inline_push_consts(const struct radv_shader_info *info,
- struct user_sgpr_info *user_sgpr_info)
+allocate_inline_push_consts(const struct radv_shader_info *info, struct user_sgpr_info *user_sgpr_info)
{
uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
/* Disable the default push constants path if all constants can be inlined and if shaders don't
* use dynamic descriptors.
*/
- if (num_push_consts <= MIN2(remaining_sgprs + 1, AC_MAX_INLINE_PUSH_CONSTS) &&
- info->can_inline_all_push_constants && !info->loads_dynamic_offsets) {
+ if (num_push_consts <= MIN2(remaining_sgprs + 1, AC_MAX_INLINE_PUSH_CONSTS) && info->can_inline_all_push_constants &&
+ !info->loads_dynamic_offsets) {
user_sgpr_info->inlined_all_push_consts = true;
remaining_sgprs++;
} else {
}
static void
-add_descriptor_set(struct radv_shader_args *args, enum ac_arg_type type, struct ac_arg *arg,
- uint32_t set)
+add_descriptor_set(struct radv_shader_args *args, enum ac_arg_type type, struct ac_arg *arg, uint32_t set)
{
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, type, arg);
}
static void
-declare_global_input_sgprs(const struct radv_shader_info *info,
- const struct user_sgpr_info *user_sgpr_info,
+declare_global_input_sgprs(const struct radv_shader_info *info, const struct user_sgpr_info *user_sgpr_info,
struct radv_shader_args *args)
{
if (user_sgpr_info) {
add_descriptor_set(args, AC_ARG_CONST_PTR, &args->descriptor_sets[i], i);
}
} else {
- add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0],
- AC_UD_INDIRECT_DESCRIPTOR_SETS);
+ add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0], AC_UD_INDIRECT_DESCRIPTOR_SETS);
}
if (info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts) {
}
for (unsigned i = 0; i < util_bitcount64(user_sgpr_info->inline_push_constant_mask); i++) {
- add_ud_arg(args, 1, AC_ARG_INT, &args->ac.inline_push_consts[i],
- AC_UD_INLINE_PUSH_CONSTANTS);
+ add_ud_arg(args, 1, AC_ARG_INT, &args->ac.inline_push_consts[i], AC_UD_INLINE_PUSH_CONSTANTS);
}
args->ac.inline_push_const_mask = user_sgpr_info->inline_push_constant_mask;
}
if (args->type != RADV_SHADER_TYPE_GS_COPY &&
(stage == MESA_SHADER_VERTEX || previous_stage == MESA_SHADER_VERTEX)) {
if (info->vs.vb_desc_usage_mask) {
- add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers,
- AC_UD_VS_VERTEX_BUFFERS);
+ add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers, AC_UD_VS_VERTEX_BUFFERS);
}
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.base_vertex, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.draw_id, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
}
if (info->vs.needs_base_instance) {
- add_ud_arg(args, 1, AC_ARG_INT, &args->ac.start_instance,
- AC_UD_VS_BASE_VERTEX_START_INSTANCE);
+ add_ud_arg(args, 1, AC_ARG_INT, &args->ac.start_instance, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
}
}
}
static void
-declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
- struct radv_shader_args *args, bool merged_vs_tcs)
+declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info, struct radv_shader_args *args,
+ bool merged_vs_tcs)
{
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
if (args->type != RADV_SHADER_TYPE_GS_COPY) {
}
static void
-declare_streamout_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args,
- gl_shader_stage stage)
+declare_streamout_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, gl_shader_stage stage)
{
int i;
}
static void
-declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args,
- bool has_ngg_query, bool has_ngg_provoking_vtx)
+declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, bool has_ngg_query,
+ bool has_ngg_provoking_vtx)
{
if (has_ngg_query)
add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_query_state, AC_UD_NGG_QUERY_STATE);
}
static void
-radv_init_shader_args(const struct radv_device *device, gl_shader_stage stage,
- enum radv_shader_type type, struct radv_shader_args *args)
+radv_init_shader_args(const struct radv_device *device, gl_shader_stage stage, enum radv_shader_type type,
+ struct radv_shader_args *args)
{
memset(args, 0, sizeof(*args));
radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_args *args)
{
add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.shader_pc, AC_UD_SCRATCH_RING_OFFSETS);
- add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0],
- AC_UD_INDIRECT_DESCRIPTOR_SETS);
+ add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0], AC_UD_INDIRECT_DESCRIPTOR_SETS);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors);
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader);
if (key->dynamic_line_rast_mode)
return true;
- if (info->ps.reads_sample_mask_in &&
- (info->ps.uses_sample_shading || key->ps.sample_shading_enable))
+ if (info->ps.reads_sample_mask_in && (info->ps.uses_sample_shading || key->ps.sample_shading_enable))
return true;
/* For computing barycentrics when the primitive topology is unknown at compile time (GPL). */
static void
declare_shader_args(const struct radv_device *device, const struct radv_pipeline_key *key,
- const struct radv_shader_info *info, gl_shader_stage stage,
- gl_shader_stage previous_stage, enum radv_shader_type type,
- struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
+ const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage,
+ enum radv_shader_type type, struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
{
const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
bool needs_view_index = info->uses_view_index;
bool has_ngg_query = info->has_ngg_prim_query || info->has_ngg_xfb_query ||
(stage == MESA_SHADER_GEOMETRY && info->gs.has_ngg_pipeline_stat_query);
- bool has_ngg_provoking_vtx = (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_GEOMETRY) &&
- key->dynamic_provoking_vtx_mode;
+ bool has_ngg_provoking_vtx =
+ (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_GEOMETRY) && key->dynamic_provoking_vtx_mode;
if (gfx_level >= GFX10 && info->is_ngg && stage != MESA_SHADER_GEOMETRY) {
/* Handle all NGG shaders as GS to simplify the code here. */
add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets, AC_UD_SCRATCH_RING_OFFSETS);
if (stage == MESA_SHADER_TASK) {
- add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->task_ring_offsets,
- AC_UD_CS_TASK_RING_OFFSETS);
+ add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->task_ring_offsets, AC_UD_CS_TASK_RING_OFFSETS);
}
/* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
}
if (info->cs.is_rt_shader) {
- add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors,
- AC_UD_CS_SBT_DESCRIPTORS);
- add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader,
- AC_UD_CS_TRAVERSAL_SHADER_ADDR);
- add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.launch_size_addr,
- AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
+ add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors, AC_UD_CS_SBT_DESCRIPTORS);
+ add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader, AC_UD_CS_TRAVERSAL_SHADER_ADDR);
+ add_ud_arg(args, 2, AC_ARG_CONST_PTR, &args->ac.rt.launch_size_addr, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
add_ud_arg(args, 1, AC_ARG_INT, &args->ac.rt.dynamic_callable_stack_base,
AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE);
}
void
radv_declare_shader_args(const struct radv_device *device, const struct radv_pipeline_key *key,
- const struct radv_shader_info *info, gl_shader_stage stage,
- gl_shader_stage previous_stage, enum radv_shader_type type,
- struct radv_shader_args *args)
+ const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage,
+ enum radv_shader_type type, struct radv_shader_args *args)
{
declare_shader_args(device, key, info, stage, previous_stage, type, args, NULL);
num_user_sgprs++;
const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
- uint32_t available_sgprs =
- gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16;
+ uint32_t available_sgprs = gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16;
uint32_t remaining_sgprs = available_sgprs - num_user_sgprs;
struct user_sgpr_info user_sgpr_info = {
gl_shader_stage previous_stage, enum radv_shader_type type,
struct radv_shader_args *args);
-void radv_declare_ps_epilog_args(const struct radv_device *device,
- const struct radv_ps_epilog_key *key,
+void radv_declare_ps_epilog_args(const struct radv_device *device, const struct radv_ps_epilog_key *key,
struct radv_shader_args *args);
void radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_args *args);
}
static void
-gather_intrinsic_load_input_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
- struct radv_shader_info *info)
+gather_intrinsic_load_input_info(const nir_shader *nir, const nir_intrinsic_instr *instr, struct radv_shader_info *info)
{
switch (nir->info.stage) {
case MESA_SHADER_VERTEX: {
}
static void
-gather_push_constant_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
- struct radv_shader_info *info)
+gather_push_constant_info(const nir_shader *nir, const nir_intrinsic_instr *instr, struct radv_shader_info *info)
{
info->loads_push_constants = true;
}
static void
-gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
- struct radv_shader_info *info, bool consider_force_vrs)
+gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, struct radv_shader_info *info,
+ bool consider_force_vrs)
{
switch (instr->intrinsic) {
case nir_intrinsic_load_barycentric_sample:
case nir_intrinsic_image_deref_atomic_swap:
case nir_intrinsic_image_deref_size:
case nir_intrinsic_image_deref_samples: {
- nir_variable *var =
- nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
+ nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
mark_sampler_desc(var, info);
break;
}
}
static void
-gather_info_block(const nir_shader *nir, const nir_block *block, struct radv_shader_info *info,
- bool consider_force_vrs)
+gather_info_block(const nir_shader *nir, const nir_block *block, struct radv_shader_info *info, bool consider_force_vrs)
{
nir_foreach_instr (instr, block) {
switch (instr->type) {
}
static void
-assign_outinfo_param(struct radv_vs_output_info *outinfo, gl_varying_slot idx,
- unsigned *total_param_exports, unsigned extra_offset)
+assign_outinfo_param(struct radv_vs_output_info *outinfo, gl_varying_slot idx, unsigned *total_param_exports,
+ unsigned extra_offset)
{
if (outinfo->vs_output_param_offset[idx] == AC_EXP_PARAM_UNDEFINED)
outinfo->vs_output_param_offset[idx] = extra_offset + (*total_param_exports)++;
}
static void
-assign_outinfo_params(struct radv_vs_output_info *outinfo, uint64_t mask,
- unsigned *total_param_exports, unsigned extra_offset)
+assign_outinfo_params(struct radv_vs_output_info *outinfo, uint64_t mask, unsigned *total_param_exports,
+ unsigned extra_offset)
{
- u_foreach_bit64(idx, mask) {
- if (idx >= VARYING_SLOT_VAR0 || idx == VARYING_SLOT_LAYER ||
- idx == VARYING_SLOT_PRIMITIVE_ID || idx == VARYING_SLOT_VIEWPORT)
+ u_foreach_bit64 (idx, mask) {
+ if (idx >= VARYING_SLOT_VAR0 || idx == VARYING_SLOT_LAYER || idx == VARYING_SLOT_PRIMITIVE_ID ||
+ idx == VARYING_SLOT_VIEWPORT)
assign_outinfo_param(outinfo, idx, total_param_exports, extra_offset);
}
}
static uint8_t
-radv_get_wave_size(struct radv_device *device, gl_shader_stage stage,
- const struct radv_shader_info *info)
+radv_get_wave_size(struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info)
{
if (stage == MESA_SHADER_GEOMETRY && !info->is_ngg)
return 64;
}
static uint8_t
-radv_get_ballot_bit_size(struct radv_device *device, gl_shader_stage stage,
- const struct radv_shader_info *info)
+radv_get_ballot_bit_size(struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info)
{
if (stage == MESA_SHADER_COMPUTE && info->cs.subgroup_size)
return info->cs.subgroup_size;
else
info->vs.vb_desc_usage_mask |= BITFIELD_BIT(key->vs.vertex_attribute_bindings[location]);
- info->vs.input_slot_usage_mask |=
- BITFIELD_RANGE(location, glsl_count_attribute_slots(type, false));
+ info->vs.input_slot_usage_mask |= BITFIELD_RANGE(location, glsl_count_attribute_slots(type, false));
} else if (glsl_type_is_matrix(type) || glsl_type_is_array(type)) {
const struct glsl_type *elem = glsl_get_array_element(type);
unsigned stride = glsl_count_attribute_slots(elem, false);
}
static void
-gather_shader_info_vs(struct radv_device *device, const nir_shader *nir,
- const struct radv_pipeline_key *pipeline_key, struct radv_shader_info *info)
+gather_shader_info_vs(struct radv_device *device, const nir_shader *nir, const struct radv_pipeline_key *pipeline_key,
+ struct radv_shader_info *info)
{
if (pipeline_key->vs.has_prolog && nir->info.inputs_read) {
info->vs.has_prolog = true;
info->vs.needs_base_instance |= info->vs.has_prolog;
info->vs.needs_draw_id |= info->vs.has_prolog;
- nir_foreach_shader_in_variable(var, nir)
- gather_info_input_decl_vs(nir, var->data.location - VERT_ATTRIB_GENERIC0, var->type,
- pipeline_key, info);
+ nir_foreach_shader_in_variable (var, nir)
+ gather_info_input_decl_vs(nir, var->data.location - VERT_ATTRIB_GENERIC0, var->type, pipeline_key, info);
if (info->vs.dynamic_inputs)
info->vs.vb_desc_usage_mask = BITFIELD_MASK(util_last_bit(info->vs.vb_desc_usage_mask));
}
static void
-gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
- const struct radv_pipeline_key *pipeline_key, struct radv_shader_info *info)
+gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir, const struct radv_pipeline_key *pipeline_key,
+ struct radv_shader_info *info)
{
info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
info->num_tess_patches =
get_tcs_num_patches(pipeline_key->tcs.tess_input_vertices, nir->info.tess.tcs_vertices_out,
info->tcs.num_linked_inputs, info->tcs.num_linked_outputs,
- info->tcs.num_linked_patch_outputs,
- device->physical_device->hs.tess_offchip_block_dw_size,
- device->physical_device->rad_info.gfx_level,
- device->physical_device->rad_info.family);
+ info->tcs.num_linked_patch_outputs, device->physical_device->hs.tess_offchip_block_dw_size,
+ device->physical_device->rad_info.gfx_level, device->physical_device->rad_info.family);
/* LDS size used by VS+TCS for storing TCS inputs and outputs. */
info->tcs.num_lds_blocks =
- calculate_tess_lds_size(device->physical_device->rad_info.gfx_level,
- pipeline_key->tcs.tess_input_vertices,
- nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs,
- info->num_tess_patches, info->tcs.num_linked_outputs,
- info->tcs.num_linked_patch_outputs);
+ calculate_tess_lds_size(device->physical_device->rad_info.gfx_level, pipeline_key->tcs.tess_input_vertices,
+ nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs, info->num_tess_patches,
+ info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs);
}
}
info->gs.input_prim = nir->info.gs.input_primitive;
info->gs.output_prim = nir->info.gs.output_primitive;
info->gs.invocations = nir->info.gs.invocations;
- info->gs.max_stream =
- nir->info.gs.active_stream_mask ? util_last_bit(nir->info.gs.active_stream_mask) - 1 : 0;
+ info->gs.max_stream = nir->info.gs.active_stream_mask ? util_last_bit(nir->info.gs.active_stream_mask) - 1 : 0;
- nir_foreach_shader_out_variable(var, nir) {
+ nir_foreach_shader_out_variable (var, nir) {
unsigned num_components = glsl_get_component_slots(var->type);
unsigned stream = var->data.stream;
ngg_info->prim_amp_factor = nir->info.mesh.max_primitives_out;
ngg_info->vgt_esgs_ring_itemsize = 1;
- unsigned min_ngg_workgroup_size =
- ac_compute_ngg_workgroup_size(ngg_info->hw_max_esverts, ngg_info->max_gsprims,
- ngg_info->max_out_verts, ngg_info->prim_amp_factor);
+ unsigned min_ngg_workgroup_size = ac_compute_ngg_workgroup_size(ngg_info->hw_max_esverts, ngg_info->max_gsprims,
+ ngg_info->max_out_verts, ngg_info->prim_amp_factor);
- unsigned api_workgroup_size =
- ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
+ unsigned api_workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
info->workgroup_size = MAX2(min_ngg_workgroup_size, api_workgroup_size);
}
}
info->ps.can_discard = nir->info.fs.uses_discard;
- info->ps.early_fragment_test = nir->info.fs.early_fragment_tests ||
- (nir->info.fs.early_and_late_fragment_tests &&
- nir->info.fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE &&
- nir->info.fs.stencil_front_layout == FRAG_STENCIL_LAYOUT_NONE &&
- nir->info.fs.stencil_back_layout == FRAG_STENCIL_LAYOUT_NONE);
+ info->ps.early_fragment_test =
+ nir->info.fs.early_fragment_tests ||
+ (nir->info.fs.early_and_late_fragment_tests && nir->info.fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE &&
+ nir->info.fs.stencil_front_layout == FRAG_STENCIL_LAYOUT_NONE &&
+ nir->info.fs.stencil_back_layout == FRAG_STENCIL_LAYOUT_NONE);
info->ps.post_depth_coverage = nir->info.fs.post_depth_coverage;
info->ps.depth_layout = nir->info.fs.depth_layout;
info->ps.uses_sample_shading = nir->info.fs.uses_sample_shading;
info->ps.reads_frag_shading_rate = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_SHADING_RATE);
info->ps.reads_front_face = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
info->ps.reads_barycentric_model = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL);
- info->ps.reads_fully_covered =
- BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FULLY_COVERED);
+ info->ps.reads_fully_covered = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FULLY_COVERED);
- bool uses_persp_or_linear_interp = info->ps.reads_persp_center ||
- info->ps.reads_persp_centroid ||
- info->ps.reads_persp_sample ||
- info->ps.reads_linear_center ||
- info->ps.reads_linear_centroid ||
- info->ps.reads_linear_sample;
+ bool uses_persp_or_linear_interp = info->ps.reads_persp_center || info->ps.reads_persp_centroid ||
+ info->ps.reads_persp_sample || info->ps.reads_linear_center ||
+ info->ps.reads_linear_centroid || info->ps.reads_linear_sample;
info->ps.allow_flat_shading =
- !(uses_persp_or_linear_interp || info->ps.needs_sample_positions ||
- info->ps.reads_frag_shading_rate || info->ps.writes_memory ||
- nir->info.fs.needs_quad_helper_invocations ||
+ !(uses_persp_or_linear_interp || info->ps.needs_sample_positions || info->ps.reads_frag_shading_rate ||
+ info->ps.writes_memory || nir->info.fs.needs_quad_helper_invocations ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
info->ps.has_epilog = pipeline_key->ps.has_epilog && info->ps.colors_written;
- info->ps.writes_mrt0_alpha =
- (pipeline_key->ps.alpha_to_coverage_via_mrtz && (info->ps.color0_written & 0x8)) &&
- (info->ps.writes_z || info->ps.writes_stencil || info->ps.writes_sample_mask);
+ info->ps.writes_mrt0_alpha = (pipeline_key->ps.alpha_to_coverage_via_mrtz && (info->ps.color0_written & 0x8)) &&
+ (info->ps.writes_z || info->ps.writes_stencil || info->ps.writes_sample_mask);
info->ps.mrt0_is_dual_src = pipeline_key->ps.epilog.mrt0_is_dual_src;
info->ps.spi_shader_col_format = pipeline_key->ps.epilog.spi_shader_col_format;
- nir_foreach_shader_in_variable(var, nir) {
- const struct glsl_type *type =
- var->data.per_vertex ? glsl_get_array_element(var->type) : var->type;
+ nir_foreach_shader_in_variable (var, nir) {
+ const struct glsl_type *type = var->data.per_vertex ? glsl_get_array_element(var->type) : var->type;
unsigned attrib_count = glsl_count_attribute_slots(type, false);
int idx = var->data.location;
}
static void
-gather_shader_info_cs(struct radv_device *device, const nir_shader *nir,
- const struct radv_pipeline_key *pipeline_key, struct radv_shader_info *info)
+gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, const struct radv_pipeline_key *pipeline_key,
+ struct radv_shader_info *info)
{
info->cs.uses_ray_launch_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_RAY_LAUNCH_SIZE_ADDR_AMD);
if (!subgroup_size)
subgroup_size = default_wave_size;
- unsigned local_size =
- nir->info.workgroup_size[0] * nir->info.workgroup_size[1] * nir->info.workgroup_size[2];
+ unsigned local_size = nir->info.workgroup_size[0] * nir->info.workgroup_size[1] * nir->info.workgroup_size[2];
/* Games don't always request full subgroups when they should, which can cause bugs if cswave32
* is enabled.
info->cs.subgroup_size = subgroup_size;
if (device->physical_device->rad_info.has_cs_regalloc_hang_bug) {
- info->cs.regalloc_hang_bug =
- info->cs.block_size[0] * info->cs.block_size[1] * info->cs.block_size[2] > 256;
+ info->cs.regalloc_hang_bug = info->cs.block_size[0] * info->cs.block_size[1] * info->cs.block_size[2] > 256;
}
}
/* Task->Mesh dispatch is linear when Y = Z = 1.
* GFX11 CP can optimize this case with a field in its draw packets.
*/
- info->cs.linear_taskmesh_dispatch = nir->info.mesh.ts_mesh_dispatch_dimensions[1] == 1 &&
- nir->info.mesh.ts_mesh_dispatch_dimensions[2] == 1;
+ info->cs.linear_taskmesh_dispatch =
+ nir->info.mesh.ts_mesh_dispatch_dimensions[1] == 1 && nir->info.mesh.ts_mesh_dispatch_dimensions[2] == 1;
}
static uint32_t
assert(info->stage != MESA_SHADER_MESH);
return R_00B130_SPI_SHADER_USER_DATA_VS_0;
case MESA_SHADER_TESS_CTRL:
- return gfx_level == GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0
- : R_00B430_SPI_SHADER_USER_DATA_HS_0;
+ return gfx_level == GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0 : R_00B430_SPI_SHADER_USER_DATA_HS_0;
case MESA_SHADER_GEOMETRY:
- return gfx_level == GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0
- : R_00B230_SPI_SHADER_USER_DATA_GS_0;
+ return gfx_level == GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : R_00B230_SPI_SHADER_USER_DATA_GS_0;
case MESA_SHADER_FRAGMENT:
return R_00B030_SPI_SHADER_USER_DATA_PS_0;
case MESA_SHADER_COMPUTE:
}
void
-radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
- gl_shader_stage next_stage,
- const struct radv_pipeline_layout *layout,
- const struct radv_pipeline_key *pipeline_key,
- const enum radv_pipeline_type pipeline_type,
- bool consider_force_vrs,
+radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir, gl_shader_stage next_stage,
+ const struct radv_pipeline_layout *layout, const struct radv_pipeline_key *pipeline_key,
+ const enum radv_pipeline_type pipeline_type, bool consider_force_vrs,
struct radv_shader_info *info)
{
info->stage = nir->info.stage;
uint64_t special_mask = BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_COUNT) |
BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES) |
BITFIELD64_BIT(VARYING_SLOT_CULL_PRIMITIVE);
- uint64_t per_prim_mask =
- nir->info.outputs_written & nir->info.per_primitive_outputs & ~special_mask;
- uint64_t per_vtx_mask =
- nir->info.outputs_written & ~nir->info.per_primitive_outputs & ~special_mask;
+ uint64_t per_prim_mask = nir->info.outputs_written & nir->info.per_primitive_outputs & ~special_mask;
+ uint64_t per_vtx_mask = nir->info.outputs_written & ~nir->info.per_primitive_outputs & ~special_mask;
/* Mesh multivew is only lowered in ac_nir_lower_ngg, so we have to fake it here. */
if (nir->info.stage == MESA_SHADER_MESH && pipeline_key->has_multiview_view_index) {
outinfo->pos_exports = util_bitcount(pos_written);
- memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
- sizeof(outinfo->vs_output_param_offset));
+ memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, sizeof(outinfo->vs_output_param_offset));
unsigned total_param_exports = 0;
switch (nir->info.stage) {
case MESA_SHADER_COMPUTE:
case MESA_SHADER_TASK:
- info->workgroup_size =
- ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
+ info->workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
/* Allow the compiler to assume that the shader always has full subgroups,
* meaning that the initial EXEC mask is -1 in all waves (all lanes enabled).
* This assumption is incorrect for ray tracing and internal (meta) shaders
* because they can use unaligned dispatch.
*/
- info->cs.uses_full_subgroups =
- pipeline_type != RADV_PIPELINE_RAY_TRACING &&
- !nir->info.internal &&
- (info->workgroup_size % info->wave_size) == 0;
+ info->cs.uses_full_subgroups = pipeline_type != RADV_PIPELINE_RAY_TRACING && !nir->info.internal &&
+ (info->workgroup_size % info->wave_size) == 0;
break;
case MESA_SHADER_MESH:
/* Already computed in gather_shader_info_mesh(). */
struct radv_legacy_gs_info *out = &gs_stage->info.gs_ring_info;
const unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1);
- const bool uses_adjacency = gs_info->gs.input_prim == MESA_PRIM_LINES_ADJACENCY ||
- gs_info->gs.input_prim == MESA_PRIM_TRIANGLES_ADJACENCY;
+ const bool uses_adjacency =
+ gs_info->gs.input_prim == MESA_PRIM_LINES_ADJACENCY || gs_info->gs.input_prim == MESA_PRIM_TRIANGLES_ADJACENCY;
/* All these are in dwords: */
/* We can't allow using the whole LDS, because GS waves compete with
* Make sure we don't go over the maximum value.
*/
if (gs_info->gs.vertices_out > 0) {
- max_gs_prims =
- MIN2(max_gs_prims, max_out_prims / (gs_info->gs.vertices_out * gs_num_invocations));
+ max_gs_prims = MIN2(max_gs_prims, max_out_prims / (gs_info->gs.vertices_out * gs_num_invocations));
}
assert(max_gs_prims > 0);
out->vgt_esgs_ring_itemsize = esgs_itemsize;
assert(max_prims_per_subgroup <= max_out_prims);
- unsigned workgroup_size = ac_compute_esgs_workgroup_size(gfx_level, es_info->wave_size,
- es_verts_per_subgroup, gs_inst_prims_in_subgroup);
+ unsigned workgroup_size =
+ ac_compute_esgs_workgroup_size(gfx_level, es_info->wave_size, es_verts_per_subgroup, gs_inst_prims_in_subgroup);
es_info->workgroup_size = workgroup_size;
gs_info->workgroup_size = workgroup_size;
}
static void
-clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts, unsigned min_verts_per_prim,
- bool use_adjacency)
+clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts, unsigned min_verts_per_prim, bool use_adjacency)
{
unsigned max_reuse = max_esverts - min_verts_per_prim;
if (use_adjacency)
}
static unsigned
-radv_get_num_input_vertices(const struct radv_pipeline_stage *es_stage,
- const struct radv_pipeline_stage *gs_stage)
+radv_get_num_input_vertices(const struct radv_pipeline_stage *es_stage, const struct radv_pipeline_stage *gs_stage)
{
if (gs_stage) {
return gs_stage->nir->info.gs.vertices_in;
}
static unsigned
-radv_get_pre_rast_input_topology(const struct radv_pipeline_stage *es_stage,
- const struct radv_pipeline_stage *gs_stage)
+radv_get_pre_rast_input_topology(const struct radv_pipeline_stage *es_stage, const struct radv_pipeline_stage *gs_stage)
{
if (gs_stage) {
return gs_stage->nir->info.gs.input_primitive;
const unsigned gs_num_invocations = gs_stage ? MAX2(gs_info->gs.invocations, 1) : 1;
const unsigned input_prim = radv_get_pre_rast_input_topology(es_stage, gs_stage);
- const bool uses_adjacency = input_prim == MESA_PRIM_LINES_ADJACENCY ||
- input_prim == MESA_PRIM_TRIANGLES_ADJACENCY;
+ const bool uses_adjacency = input_prim == MESA_PRIM_LINES_ADJACENCY || input_prim == MESA_PRIM_TRIANGLES_ADJACENCY;
/* All these are in dwords: */
/* We can't allow using the whole LDS, because GS waves compete with
/* All these are per subgroup: */
const unsigned min_esverts = gfx_level >= GFX11 ? 3 : /* gfx11 requires at least 1 primitive per TG */
- gfx_level >= GFX10_3 ? 29 : 24;
+ gfx_level >= GFX10_3 ? 29
+ : 24;
bool max_vert_out_per_gs_instance = false;
unsigned max_esverts_base = 128;
unsigned max_gsprims_base = 128; /* default prim group size clamp */
max_esverts = align(max_esverts, wavesize);
max_esverts = MIN2(max_esverts, max_esverts_base);
if (esvert_lds_size)
- max_esverts =
- MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size);
+ max_esverts = MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size);
max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
/* Hardware restriction: minimum value of max_esverts */
* for triangles.
*/
unsigned usable_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
- max_gsprims = MIN2(max_gsprims,
- (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size);
+ max_gsprims = MIN2(max_gsprims, (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size);
}
clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency);
assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
}
unsigned max_out_vertices = max_vert_out_per_gs_instance ? gs_info->gs.vertices_out
- : gs_stage
- ? max_gsprims * gs_num_invocations * gs_info->gs.vertices_out
- : max_esverts;
+ : gs_stage ? max_gsprims * gs_num_invocations * gs_info->gs.vertices_out
+ : max_esverts;
assert(max_out_vertices <= 256);
unsigned prim_amp_factor = 1;
assert(out->hw_max_esverts >= min_esverts); /* HW limitation */
unsigned workgroup_size =
- ac_compute_ngg_workgroup_size(
- max_esverts, max_gsprims * gs_num_invocations, max_out_vertices, prim_amp_factor);
+ ac_compute_ngg_workgroup_size(max_esverts, max_gsprims * gs_num_invocations, max_out_vertices, prim_amp_factor);
if (gs_stage) {
gs_info->workgroup_size = workgroup_size;
}
static void
gfx10_get_ngg_query_info(const struct radv_device *device, struct radv_pipeline_stage *es_stage,
- struct radv_pipeline_stage *gs_stage,
- const struct radv_pipeline_key *pipeline_key)
+ struct radv_pipeline_stage *gs_stage, const struct radv_pipeline_key *pipeline_key)
{
struct radv_shader_info *info = gs_stage ? &gs_stage->info : &es_stage->info;
- info->gs.has_ngg_pipeline_stat_query =
- device->physical_device->emulate_ngg_gs_query_pipeline_stat && !!gs_stage;
+ info->gs.has_ngg_pipeline_stat_query = device->physical_device->emulate_ngg_gs_query_pipeline_stat && !!gs_stage;
info->has_ngg_xfb_query = gs_stage ? !!gs_stage->nir->xfb_info : !!es_stage->nir->xfb_info;
info->has_ngg_prim_query = pipeline_key->primitives_generated_query || info->has_ngg_xfb_query;
}
static void
radv_determine_ngg_settings(struct radv_device *device, struct radv_pipeline_stage *es_stage,
- struct radv_pipeline_stage *fs_stage,
- const struct radv_pipeline_key *pipeline_key)
+ struct radv_pipeline_stage *fs_stage, const struct radv_pipeline_key *pipeline_key)
{
assert(es_stage->stage == MESA_SHADER_VERTEX || es_stage->stage == MESA_SHADER_TESS_EVAL);
assert(!fs_stage || fs_stage->stage == MESA_SHADER_FRAGMENT);
if (es_stage->stage == MESA_SHADER_VERTEX) {
num_vertices_per_prim = radv_get_num_vertices_per_prim(pipeline_key);
} else if (es_stage->stage == MESA_SHADER_TESS_EVAL) {
- num_vertices_per_prim = es_stage->nir->info.tess.point_mode ? 1 :
- es_stage->nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES ? 2 : 3;
+ num_vertices_per_prim = es_stage->nir->info.tess.point_mode ? 1
+ : es_stage->nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES ? 2
+ : 3;
}
/* TODO: Enable culling for LLVM. */
- es_stage->info.has_ngg_culling =
- radv_consider_culling(device->physical_device, es_stage->nir, ps_inputs_read,
- num_vertices_per_prim, &es_stage->info) &&
- !radv_use_llvm_for_stage(device, es_stage->stage);
+ es_stage->info.has_ngg_culling = radv_consider_culling(device->physical_device, es_stage->nir, ps_inputs_read,
+ num_vertices_per_prim, &es_stage->info) &&
+ !radv_use_llvm_for_stage(device, es_stage->stage);
nir_function_impl *impl = nir_shader_get_entrypoint(es_stage->nir);
es_stage->info.has_ngg_early_prim_export = exec_list_is_singular(&impl->body);
/* NGG passthrough mode should be disabled when culling and when the vertex shader
* exports the primitive ID.
*/
- es_stage->info.is_ngg_passthrough = !es_stage->info.has_ngg_culling &&
- !(es_stage->stage == MESA_SHADER_VERTEX && es_stage->info.outinfo.export_prim_id);
+ es_stage->info.is_ngg_passthrough = !es_stage->info.has_ngg_culling && !(es_stage->stage == MESA_SHADER_VERTEX &&
+ es_stage->info.outinfo.export_prim_id);
}
static void
-radv_link_shaders_info(struct radv_device *device,
- struct radv_pipeline_stage *producer, struct radv_pipeline_stage *consumer,
- const struct radv_pipeline_key *pipeline_key)
+radv_link_shaders_info(struct radv_device *device, struct radv_pipeline_stage *producer,
+ struct radv_pipeline_stage *consumer, const struct radv_pipeline_key *pipeline_key)
{
/* Export primitive ID and clip/cull distances if read by the FS, or export unconditionally when
* the next stage is unknown (with graphics pipeline library).
const bool ps_prim_id_in = !consumer || consumer->info.ps.prim_id_input;
const bool ps_clip_dists_in = !consumer || !!consumer->info.ps.num_input_clips_culls;
- if (ps_prim_id_in &&
- (producer->stage == MESA_SHADER_VERTEX || producer->stage == MESA_SHADER_TESS_EVAL)) {
+ if (ps_prim_id_in && (producer->stage == MESA_SHADER_VERTEX || producer->stage == MESA_SHADER_TESS_EVAL)) {
/* Mark the primitive ID as output when it's implicitly exported by VS or TES. */
if (outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] == AC_EXP_PARAM_UNDEFINED)
outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = outinfo->param_exports++;
/* Compute the ESGS item size for VS or TES as ES. */
producer->info.esgs_itemsize = num_outputs_written * 16;
- /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
- * conflicts, i.e. each vertex will start on a different bank.
- */
+ /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
+ * conflicts, i.e. each vertex will start on a different bank.
+ */
if (device->physical_device->rad_info.gfx_level >= GFX9 && producer->info.esgs_itemsize)
producer->info.esgs_itemsize += 4;
}
/* Compute NGG info (GFX10+) or GS info. */
if (producer->info.is_ngg) {
- struct radv_pipeline_stage *gs_stage =
- consumer && consumer->stage == MESA_SHADER_GEOMETRY ? consumer : NULL;
+ struct radv_pipeline_stage *gs_stage = consumer && consumer->stage == MESA_SHADER_GEOMETRY ? consumer : NULL;
gfx10_get_ngg_info(device, producer, gs_stage);
gfx10_get_ngg_query_info(device, producer, gs_stage, pipeline_key);
}
}
- if (producer->stage == MESA_SHADER_VERTEX &&
- consumer && consumer->stage == MESA_SHADER_TESS_CTRL) {
+ if (producer->stage == MESA_SHADER_VERTEX && consumer && consumer->stage == MESA_SHADER_TESS_CTRL) {
struct radv_pipeline_stage *vs_stage = producer;
struct radv_pipeline_stage *tcs_stage = consumer;
vs_stage->info.workgroup_size = 256;
tcs_stage->info.workgroup_size = 256;
} else {
- vs_stage->info.workgroup_size =
- ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level,
- MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches,
- pipeline_key->tcs.tess_input_vertices,
- tcs_stage->info.tcs.tcs_vertices_out);
-
- tcs_stage->info.workgroup_size =
- ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level,
- MESA_SHADER_TESS_CTRL, tcs_stage->info.num_tess_patches,
- pipeline_key->tcs.tess_input_vertices,
- tcs_stage->info.tcs.tcs_vertices_out);
+ vs_stage->info.workgroup_size = ac_compute_lshs_workgroup_size(
+ device->physical_device->rad_info.gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches,
+ pipeline_key->tcs.tess_input_vertices, tcs_stage->info.tcs.tcs_vertices_out);
+
+ tcs_stage->info.workgroup_size = ac_compute_lshs_workgroup_size(
+ device->physical_device->rad_info.gfx_level, MESA_SHADER_TESS_CTRL, tcs_stage->info.num_tess_patches,
+ pipeline_key->tcs.tess_input_vertices, tcs_stage->info.tcs.tcs_vertices_out);
if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) {
/* When the number of TCS input and output vertices are the same (typically 3):
vs_stage->info.vs.tcs_in_out_eq =
device->physical_device->rad_info.gfx_level >= GFX9 &&
pipeline_key->tcs.tess_input_vertices == tcs_stage->info.tcs.tcs_vertices_out &&
- vs_stage->nir->info.float_controls_execution_mode ==
- tcs_stage->nir->info.float_controls_execution_mode;
+ vs_stage->nir->info.float_controls_execution_mode == tcs_stage->nir->info.float_controls_execution_mode;
if (vs_stage->info.vs.tcs_in_out_eq)
vs_stage->info.vs.tcs_temp_only_input_mask =
- tcs_stage->nir->info.inputs_read &
- vs_stage->nir->info.outputs_written &
+ tcs_stage->nir->info.inputs_read & vs_stage->nir->info.outputs_written &
~tcs_stage->nir->info.tess.tcs_cross_invocation_inputs_read &
- ~tcs_stage->nir->info.inputs_read_indirectly &
- ~vs_stage->nir->info.outputs_accessed_indirectly;
+ ~tcs_stage->nir->info.inputs_read_indirectly & ~vs_stage->nir->info.outputs_accessed_indirectly;
}
}
}
}
static const gl_shader_stage graphics_shader_order[] = {
- MESA_SHADER_VERTEX,
- MESA_SHADER_TESS_CTRL,
- MESA_SHADER_TESS_EVAL,
- MESA_SHADER_GEOMETRY,
+ MESA_SHADER_VERTEX, MESA_SHADER_TESS_CTRL, MESA_SHADER_TESS_EVAL, MESA_SHADER_GEOMETRY,
- MESA_SHADER_TASK,
- MESA_SHADER_MESH,
+ MESA_SHADER_TASK, MESA_SHADER_MESH,
};
void
struct radv_pipeline_stage *stages)
{
/* Walk backwards to link */
- struct radv_pipeline_stage *next_stage =
- stages[MESA_SHADER_FRAGMENT].nir ? &stages[MESA_SHADER_FRAGMENT] : NULL;
+ struct radv_pipeline_stage *next_stage = stages[MESA_SHADER_FRAGMENT].nir ? &stages[MESA_SHADER_FRAGMENT] : NULL;
for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) {
gl_shader_stage s = graphics_shader_order[i];
/* Merge shader info for VS+GS or TES+GS. */
if (stages[MESA_SHADER_GEOMETRY].nir) {
- gl_shader_stage pre_stage =
- stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
+ gl_shader_stage pre_stage = stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
radv_nir_shader_info_merge(&stages[pre_stage], &stages[MESA_SHADER_GEOMETRY]);
}
{
struct radeon_winsys *ws = device->ws;
uint64_t size = 32 * 1024 * 1024; /* Default to 1MB. */
- uint16_t sample_interval = 4096; /* Default to 4096 clk. */
+ uint16_t sample_interval = 4096; /* Default to 4096 clk. */
VkResult result;
device->spm.buffer_size = size;
device->spm.sample_interval = sample_interval;
struct radeon_winsys_bo *bo = NULL;
- result = ws->buffer_create(
- ws, size, 4096, RADEON_DOMAIN_VRAM,
- RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
- RADV_BO_PRIORITY_SCRATCH, 0, &bo);
+ result = ws->buffer_create(ws, size, 4096, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
+ RADV_BO_PRIORITY_SCRATCH, 0, &bo);
device->spm.bo = bo;
if (result != VK_SUCCESS)
return false;
}
/* Restore global broadcasting. */
- radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
- S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
+ radeon_set_uconfig_reg(
+ cs, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1));
}
void
/* Configure the SPM ring buffer. */
radeon_set_uconfig_reg(cs, R_037200_RLC_SPM_PERFMON_CNTL,
- S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
- S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */
+ S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
+ S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */
radeon_set_uconfig_reg(cs, R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
- radeon_set_uconfig_reg(cs, R_037208_RLC_SPM_PERFMON_RING_BASE_HI,
- S_037208_RING_BASE_HI(va >> 32));
+ radeon_set_uconfig_reg(cs, R_037208_RLC_SPM_PERFMON_RING_BASE_HI, S_037208_RING_BASE_HI(va >> 32));
radeon_set_uconfig_reg(cs, R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size);
/* Configure the muxsel. */
radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0);
radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
- radeon_set_uconfig_reg(cs, R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
- S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[0]) |
- S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[1]) |
- S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[2]) |
- S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[3]));
- radeon_set_uconfig_reg(cs, R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
- S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
- S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[4]));
+ radeon_set_uconfig_reg(
+ cs, R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
+ S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[0]) | S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[1]) |
+ S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[2]) | S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[3]));
+ radeon_set_uconfig_reg(
+ cs, R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
+ S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) | S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[4]));
/* Upload each muxsel ram to the RLC. */
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
unsigned rlc_muxsel_addr, rlc_muxsel_data;
- unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) |
- S_030800_INSTANCE_BROADCAST_WRITES(1);
+ unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1);
if (!spm->num_muxsel_lines[s])
continue;
/* Write the muxsel line configuration with MUXSEL_DATA. */
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_ME) |
- S_370_WR_ONE_ADDR(1));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME) |
+ S_370_WR_ONE_ADDR(1));
radeon_emit(cs, rlc_muxsel_data >> 2);
radeon_emit(cs, 0);
radeon_emit_array(cs, data, AC_SPM_MUXSEL_LINE_SIZE);
const struct radeon_info *info = &device->physical_device->rad_info;
struct ac_perfcounters *pc = &device->physical_device->ac_perfcounters;
struct ac_spm_counter_create_info spm_counters[] = {
- {TCP, 0, 0x9}, /* Number of L2 requests. */
- {TCP, 0, 0x12}, /* Number of L2 misses. */
- {SQ, 0, 0x14f}, /* Number of SCACHE hits. */
- {SQ, 0, 0x150}, /* Number of SCACHE misses. */
- {SQ, 0, 0x151}, /* Number of SCACHE misses duplicate. */
- {SQ, 0, 0x12c}, /* Number of ICACHE hits. */
- {SQ, 0, 0x12d}, /* Number of ICACHE misses. */
- {SQ, 0, 0x12e}, /* Number of ICACHE misses duplicate. */
- {GL1C, 0, 0xe}, /* Number of GL1C requests. */
- {GL1C, 0, 0x12}, /* Number of GL1C misses. */
- {GL2C, 0, 0x3}, /* Number of GL2C requests. */
- {GL2C, 0, info->gfx_level >= GFX10_3 ? 0x2b : 0x23}, /* Number of GL2C misses. */
+ {TCP, 0, 0x9}, /* Number of L2 requests. */
+ {TCP, 0, 0x12}, /* Number of L2 misses. */
+ {SQ, 0, 0x14f}, /* Number of SCACHE hits. */
+ {SQ, 0, 0x150}, /* Number of SCACHE misses. */
+ {SQ, 0, 0x151}, /* Number of SCACHE misses duplicate. */
+ {SQ, 0, 0x12c}, /* Number of ICACHE hits. */
+ {SQ, 0, 0x12d}, /* Number of ICACHE misses. */
+ {SQ, 0, 0x12e}, /* Number of ICACHE misses duplicate. */
+ {GL1C, 0, 0xe}, /* Number of GL1C requests. */
+ {GL1C, 0, 0x12}, /* Number of GL1C misses. */
+ {GL2C, 0, 0x3}, /* Number of GL2C requests. */
+ {GL2C, 0, info->gfx_level >= GFX10_3 ? 0x2b : 0x23}, /* Number of GL2C misses. */
};
/* We failed to initialize the performance counters. */
static uint32_t
gfx11_get_sqtt_ctrl(const struct radv_device *device, bool enable)
{
- return S_0367B0_MODE(enable) | S_0367B0_HIWATER(5) | S_0367B0_UTIL_TIMER(1) |
- S_0367B0_RT_FREQ(2) | /* 4096 clk */
- S_0367B0_DRAW_EVENT_EN(1) | S_0367B0_SPI_STALL_EN(1) | S_0367B0_SQ_STALL_EN(1) |
- S_0367B0_REG_AT_HWM(2);
+ return S_0367B0_MODE(enable) | S_0367B0_HIWATER(5) | S_0367B0_UTIL_TIMER(1) | S_0367B0_RT_FREQ(2) | /* 4096 clk */
+ S_0367B0_DRAW_EVENT_EN(1) | S_0367B0_SPI_STALL_EN(1) | S_0367B0_SQ_STALL_EN(1) | S_0367B0_REG_AT_HWM(2);
}
static uint32_t
{
uint32_t sqtt_ctrl = S_008D1C_MODE(enable) | S_008D1C_HIWATER(5) | S_008D1C_UTIL_TIMER(1) |
S_008D1C_RT_FREQ(2) | /* 4096 clk */
- S_008D1C_DRAW_EVENT_EN(1) | S_008D1C_REG_STALL_EN(1) |
- S_008D1C_SPI_STALL_EN(1) | S_008D1C_SQ_STALL_EN(1) |
- S_008D1C_REG_DROP_ON_STALL(0);
+ S_008D1C_DRAW_EVENT_EN(1) | S_008D1C_REG_STALL_EN(1) | S_008D1C_SPI_STALL_EN(1) |
+ S_008D1C_SQ_STALL_EN(1) | S_008D1C_REG_DROP_ON_STALL(0);
if (device->physical_device->rad_info.gfx_level == GFX10_3)
sqtt_ctrl |= S_008D1C_LOWATER_OFFSET(4);
si_cs_emit_cache_flush(
device->ws, cs, device->physical_device->rad_info.gfx_level, NULL, 0,
family == AMD_IP_COMPUTE && device->physical_device->rad_info.gfx_level >= GFX7,
- (family == RADV_QUEUE_COMPUTE
- ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
- : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
- RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
- RADV_CMD_FLAG_INV_L2,
+ (family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
+ : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
+ RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2,
&sqtt_flush_bits, 0);
}
static void
-radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
- enum radv_queue_family qf)
+radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
{
uint32_t shifted_size = device->sqtt.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
const struct radeon_info *rad_info = &device->physical_device->rad_info;
continue;
/* Target SEx and SH0. */
- radeon_set_uconfig_reg(
- cs, R_030800_GRBM_GFX_INDEX,
- S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
+ radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
if (device->physical_device->rad_info.gfx_level >= GFX11) {
/* Order seems important for the following 2 registers. */
radeon_set_uconfig_reg(cs, R_0367B4_SQ_THREAD_TRACE_MASK,
S_0367B4_WTYPE_INCLUDE(0x7f) | /* all shader stages */
- S_0367B4_SA_SEL(0) | S_0367B4_WGP_SEL(first_active_cu / 2) |
- S_0367B4_SIMD_SEL(0));
+ S_0367B4_SA_SEL(0) | S_0367B4_WGP_SEL(first_active_cu / 2) | S_0367B4_SIMD_SEL(0));
- uint32_t sqtt_token_mask = S_0367B8_REG_INCLUDE(
- V_0367B8_REG_INCLUDE_SQDEC | V_0367B8_REG_INCLUDE_SHDEC | V_0367B8_REG_INCLUDE_GFXUDEC |
- V_0367B8_REG_INCLUDE_COMP | V_0367B8_REG_INCLUDE_CONTEXT | V_0367B8_REG_INCLUDE_CONFIG);
+ uint32_t sqtt_token_mask = S_0367B8_REG_INCLUDE(V_0367B8_REG_INCLUDE_SQDEC | V_0367B8_REG_INCLUDE_SHDEC |
+ V_0367B8_REG_INCLUDE_GFXUDEC | V_0367B8_REG_INCLUDE_COMP |
+ V_0367B8_REG_INCLUDE_CONTEXT | V_0367B8_REG_INCLUDE_CONFIG);
/* Performance counters with SQTT are considered deprecated. */
uint32_t token_exclude = V_0367B8_TOKEN_EXCLUDE_PERF;
radeon_set_uconfig_reg(cs, R_0367B8_SQ_THREAD_TRACE_TOKEN_MASK, sqtt_token_mask);
/* Should be emitted last (it enables thread traces). */
- radeon_set_uconfig_reg(cs, R_0367B0_SQ_THREAD_TRACE_CTRL,
- gfx11_get_sqtt_ctrl(device, true));
+ radeon_set_uconfig_reg(cs, R_0367B0_SQ_THREAD_TRACE_CTRL, gfx11_get_sqtt_ctrl(device, true));
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
/* Order seems important for the following 2 registers. */
- radeon_set_privileged_config_reg(
- cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
- S_008D04_SIZE(shifted_size) | S_008D04_BASE_HI(shifted_va >> 32));
+ radeon_set_privileged_config_reg(cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
+ S_008D04_SIZE(shifted_size) | S_008D04_BASE_HI(shifted_va >> 32));
radeon_set_privileged_config_reg(cs, R_008D00_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
- radeon_set_privileged_config_reg(
- cs, R_008D14_SQ_THREAD_TRACE_MASK,
- S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */
- S_008D14_SA_SEL(0) | S_008D14_WGP_SEL(first_active_cu / 2) | S_008D14_SIMD_SEL(0));
+ radeon_set_privileged_config_reg(cs, R_008D14_SQ_THREAD_TRACE_MASK,
+ S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */
+ S_008D14_SA_SEL(0) | S_008D14_WGP_SEL(first_active_cu / 2) |
+ S_008D14_SIMD_SEL(0));
- uint32_t sqtt_token_mask = S_008D18_REG_INCLUDE(
- V_008D18_REG_INCLUDE_SQDEC | V_008D18_REG_INCLUDE_SHDEC | V_008D18_REG_INCLUDE_GFXUDEC |
- V_008D18_REG_INCLUDE_COMP | V_008D18_REG_INCLUDE_CONTEXT | V_008D18_REG_INCLUDE_CONFIG);
+ uint32_t sqtt_token_mask = S_008D18_REG_INCLUDE(V_008D18_REG_INCLUDE_SQDEC | V_008D18_REG_INCLUDE_SHDEC |
+ V_008D18_REG_INCLUDE_GFXUDEC | V_008D18_REG_INCLUDE_COMP |
+ V_008D18_REG_INCLUDE_CONTEXT | V_008D18_REG_INCLUDE_CONFIG);
/* Performance counters with SQTT are considered deprecated. */
uint32_t token_exclude = V_008D18_TOKEN_EXCLUDE_PERF;
if (!radv_is_instruction_timing_enabled()) {
/* Reduce SQTT traffic when instruction timing isn't enabled. */
- token_exclude |= V_008D18_TOKEN_EXCLUDE_VMEMEXEC |
- V_008D18_TOKEN_EXCLUDE_ALUEXEC |
- V_008D18_TOKEN_EXCLUDE_VALUINST |
- V_008D18_TOKEN_EXCLUDE_IMMEDIATE |
+ token_exclude |= V_008D18_TOKEN_EXCLUDE_VMEMEXEC | V_008D18_TOKEN_EXCLUDE_ALUEXEC |
+ V_008D18_TOKEN_EXCLUDE_VALUINST | V_008D18_TOKEN_EXCLUDE_IMMEDIATE |
V_008D18_TOKEN_EXCLUDE_INST;
}
sqtt_token_mask |= S_008D18_TOKEN_EXCLUDE(token_exclude);
radeon_set_privileged_config_reg(cs, R_008D18_SQ_THREAD_TRACE_TOKEN_MASK, sqtt_token_mask);
/* Should be emitted last (it enables thread traces). */
- radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL,
- gfx10_get_sqtt_ctrl(device, true));
+ radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL, gfx10_get_sqtt_ctrl(device, true));
} else {
/* Order seems important for the following 4 registers. */
- radeon_set_uconfig_reg(cs, R_030CDC_SQ_THREAD_TRACE_BASE2,
- S_030CDC_ADDR_HI(shifted_va >> 32));
+ radeon_set_uconfig_reg(cs, R_030CDC_SQ_THREAD_TRACE_BASE2, S_030CDC_ADDR_HI(shifted_va >> 32));
radeon_set_uconfig_reg(cs, R_030CC0_SQ_THREAD_TRACE_BASE, shifted_va);
radeon_set_uconfig_reg(cs, R_030CD4_SQ_THREAD_TRACE_CTRL, S_030CD4_RESET_BUFFER(1));
- uint32_t sqtt_mask = S_030CC8_CU_SEL(first_active_cu) | S_030CC8_SH_SEL(0) |
- S_030CC8_SIMD_EN(0xf) | S_030CC8_VM_ID_MASK(0) |
- S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) |
+ uint32_t sqtt_mask = S_030CC8_CU_SEL(first_active_cu) | S_030CC8_SH_SEL(0) | S_030CC8_SIMD_EN(0xf) |
+ S_030CC8_VM_ID_MASK(0) | S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) |
S_030CC8_SQ_STALL_EN(1);
if (device->physical_device->rad_info.gfx_level < GFX9) {
radeon_set_uconfig_reg(cs, R_030CC8_SQ_THREAD_TRACE_MASK, sqtt_mask);
/* Trace all tokens and registers. */
- radeon_set_uconfig_reg(
- cs, R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK,
- S_030CCC_TOKEN_MASK(0xbfff) | S_030CCC_REG_MASK(0xff) | S_030CCC_REG_DROP_ON_STALL(0));
+ radeon_set_uconfig_reg(cs, R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK,
+ S_030CCC_TOKEN_MASK(0xbfff) | S_030CCC_REG_MASK(0xff) | S_030CCC_REG_DROP_ON_STALL(0));
/* Enable SQTT perf counters for all CUs. */
radeon_set_uconfig_reg(cs, R_030CD0_SQ_THREAD_TRACE_PERF_MASK,
}
/* Enable the thread trace mode. */
- uint32_t sqtt_mode =
- S_030CD8_MASK_PS(1) | S_030CD8_MASK_VS(1) | S_030CD8_MASK_GS(1) | S_030CD8_MASK_ES(1) |
- S_030CD8_MASK_HS(1) | S_030CD8_MASK_LS(1) | S_030CD8_MASK_CS(1) |
- S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
- S_030CD8_MODE(1);
+ uint32_t sqtt_mode = S_030CD8_MASK_PS(1) | S_030CD8_MASK_VS(1) | S_030CD8_MASK_GS(1) | S_030CD8_MASK_ES(1) |
+ S_030CD8_MASK_HS(1) | S_030CD8_MASK_LS(1) | S_030CD8_MASK_CS(1) |
+ S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
+ S_030CD8_MODE(1);
if (device->physical_device->rad_info.gfx_level == GFX9) {
/* Count SQTT traffic in TCC perf counters. */
}
/* Restore global broadcasting. */
- radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
- S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
+ radeon_set_uconfig_reg(
+ cs, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1));
/* Start the thread trace with a different event based on the queue. */
if (qf == RADV_QUEUE_COMPUTE) {
/* Copy back the info struct one DWORD at a time. */
for (unsigned i = 0; i < 3; i++) {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) |
- COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) | COPY_DATA_WR_CONFIRM);
radeon_emit(cs, sqtt_info_regs[i] >> 2);
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, (info_va + i * 4));
continue;
/* Target SEi and SH0. */
- radeon_set_uconfig_reg(
- cs, R_030800_GRBM_GFX_INDEX,
- S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
+ radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
if (device->physical_device->rad_info.gfx_level >= GFX11) {
/* Make sure to wait for the trace buffer. */
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(
- cs,
- WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
+ radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
radeon_emit(cs, R_0367D0_SQ_THREAD_TRACE_STATUS >> 2); /* register */
radeon_emit(cs, 0);
- radeon_emit(cs, 0); /* reference value */
+ radeon_emit(cs, 0); /* reference value */
radeon_emit(cs, ~C_0367D0_FINISH_DONE);
- radeon_emit(cs, 4); /* poll interval */
+ radeon_emit(cs, 4); /* poll interval */
/* Disable the thread trace mode. */
- radeon_set_uconfig_reg(cs, R_0367B0_SQ_THREAD_TRACE_CTRL,
- gfx11_get_sqtt_ctrl(device, false));
+ radeon_set_uconfig_reg(cs, R_0367B0_SQ_THREAD_TRACE_CTRL, gfx11_get_sqtt_ctrl(device, false));
/* Wait for thread trace completion. */
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(
- cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+ radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
radeon_emit(cs, R_0367D0_SQ_THREAD_TRACE_STATUS >> 2); /* register */
radeon_emit(cs, 0);
radeon_emit(cs, 0); /* reference value */
if (!device->physical_device->rad_info.has_sqtt_rb_harvest_bug) {
/* Make sure to wait for the trace buffer. */
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(
- cs,
- WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
+ radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
radeon_emit(cs, R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */
radeon_emit(cs, 0);
- radeon_emit(cs, 0); /* reference value */
+ radeon_emit(cs, 0); /* reference value */
radeon_emit(cs, ~C_008D20_FINISH_DONE);
- radeon_emit(cs, 4); /* poll interval */
+ radeon_emit(cs, 4); /* poll interval */
}
/* Disable the thread trace mode. */
- radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL,
- gfx10_get_sqtt_ctrl(device, false));
+ radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL, gfx10_get_sqtt_ctrl(device, false));
/* Wait for thread trace completion. */
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(
- cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+ radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
radeon_emit(cs, R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */
radeon_emit(cs, 0);
- radeon_emit(cs, 0); /* reference value */
+ radeon_emit(cs, 0); /* reference value */
radeon_emit(cs, ~C_008D20_BUSY); /* mask */
- radeon_emit(cs, 4); /* poll interval */
+ radeon_emit(cs, 4); /* poll interval */
} else {
/* Disable the thread trace mode. */
radeon_set_uconfig_reg(cs, R_030CD8_SQ_THREAD_TRACE_MODE, S_030CD8_MODE(0));
/* Wait for thread trace completion. */
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(
- cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+ radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
radeon_emit(cs, R_030CE8_SQ_THREAD_TRACE_STATUS >> 2); /* register */
radeon_emit(cs, 0);
- radeon_emit(cs, 0); /* reference value */
+ radeon_emit(cs, 0); /* reference value */
radeon_emit(cs, ~C_030CE8_BUSY); /* mask */
- radeon_emit(cs, 4); /* poll interval */
+ radeon_emit(cs, 4); /* poll interval */
}
radv_copy_sqtt_info_regs(device, cs, se);
}
/* Restore global broadcasting. */
- radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
- S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
+ radeon_set_uconfig_reg(
+ cs, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1));
}
void
radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable)
{
if (device->physical_device->rad_info.gfx_level >= GFX9) {
- uint32_t spi_config_cntl =
- S_031100_GPR_WRITE_PRIORITY(0x2c688) | S_031100_EXP_PRIORITY_ORDER(3) |
- S_031100_ENABLE_SQG_TOP_EVENTS(enable) | S_031100_ENABLE_SQG_BOP_EVENTS(enable);
+ uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) | S_031100_EXP_PRIORITY_ORDER(3) |
+ S_031100_ENABLE_SQG_TOP_EVENTS(enable) | S_031100_ENABLE_SQG_BOP_EVENTS(enable);
if (device->physical_device->rad_info.gfx_level >= GFX10)
spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3);
radeon_set_uconfig_reg(cs, R_031100_SPI_CONFIG_CNTL, spi_config_cntl);
} else {
/* SPI_CONFIG_CNTL is a protected register on GFX6-GFX8. */
- radeon_set_privileged_config_reg(
- cs, R_009100_SPI_CONFIG_CNTL,
- S_009100_ENABLE_SQG_TOP_EVENTS(enable) | S_009100_ENABLE_SQG_BOP_EVENTS(enable));
+ radeon_set_privileged_config_reg(cs, R_009100_SPI_CONFIG_CNTL,
+ S_009100_ENABLE_SQG_TOP_EVENTS(enable) | S_009100_ENABLE_SQG_BOP_EVENTS(enable));
}
}
return; /* not needed */
if (device->physical_device->rad_info.gfx_level >= GFX10) {
- radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL,
- S_037390_PERFMON_CLOCK_STATE(inhibit));
+ radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL, S_037390_PERFMON_CLOCK_STATE(inhibit));
} else if (device->physical_device->rad_info.gfx_level >= GFX8) {
- radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL,
- S_0372FC_PERFMON_CLOCK_STATE(inhibit));
+ radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL, S_0372FC_PERFMON_CLOCK_STATE(inhibit));
}
}
size += device->sqtt.buffer_size * (uint64_t)max_se;
struct radeon_winsys_bo *bo = NULL;
- result = ws->buffer_create(
- ws, size, 4096, RADEON_DOMAIN_VRAM,
- RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
- RADV_BO_PRIORITY_SCRATCH, 0, &bo);
+ result = ws->buffer_create(ws, size, 4096, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
+ RADV_BO_PRIORITY_SCRATCH, 0, &bo);
device->sqtt.bo = bo;
if (result != VK_SUCCESS)
return false;
/* Destroy queue info record. */
simple_mtx_lock(&queue_info->lock);
if (queue_info->record_count > 0) {
- list_for_each_entry_safe(struct rgp_queue_info_record, record, &queue_info->record, list)
- {
+ list_for_each_entry_safe (struct rgp_queue_info_record, record, &queue_info->record, list) {
if (record->queue_id == (uintptr_t)queue) {
queue_info->record_count--;
list_del(&record->list);
struct ac_sqtt *sqtt = &device->sqtt;
/* Default buffer size set to 32MB per SE. */
- device->sqtt.buffer_size =
- (uint32_t)debug_get_num_option("RADV_THREAD_TRACE_BUFFER_SIZE", 32 * 1024 * 1024);
+ device->sqtt.buffer_size = (uint32_t)debug_get_num_option("RADV_THREAD_TRACE_BUFFER_SIZE", 32 * 1024 * 1024);
device->sqtt.start_frame = (int)debug_get_num_option("RADV_THREAD_TRACE", -1);
const char *trigger_file = getenv("RADV_THREAD_TRACE_TRIGGER");
/* Clear clock calibration records. */
simple_mtx_lock(&clock_calibration->lock);
- list_for_each_entry_safe(struct rgp_clock_calibration_record, record, &clock_calibration->record,
- list)
- {
+ list_for_each_entry_safe (struct rgp_clock_calibration_record, record, &clock_calibration->record, list) {
clock_calibration->record_count--;
list_del(&record->list);
free(record);
}
static VkResult
-radv_get_calibrated_timestamps(struct radv_device *device, uint64_t *cpu_timestamp,
- uint64_t *gpu_timestamp)
+radv_get_calibrated_timestamps(struct radv_device *device, uint64_t *cpu_timestamp, uint64_t *gpu_timestamp)
{
uint64_t timestamps[2];
uint64_t max_deviation;
VkResult result;
- const VkCalibratedTimestampInfoEXT timestamp_infos[2] = {
- {
- .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT,
- .timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
- },
- {
- .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT,
- .timeDomain = VK_TIME_DOMAIN_DEVICE_EXT,
- }
- };
-
- result = radv_GetCalibratedTimestampsEXT(radv_device_to_handle(device), 2, timestamp_infos,
- timestamps, &max_deviation);
+ const VkCalibratedTimestampInfoEXT timestamp_infos[2] = {{
+ .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT,
+ .timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT,
+ .timeDomain = VK_TIME_DOMAIN_DEVICE_EXT,
+ }};
+
+ result =
+ radv_GetCalibratedTimestampsEXT(radv_device_to_handle(device), 2, timestamp_infos, timestamps, &max_deviation);
if (result != VK_SUCCESS)
return result;
#include "radv_private.h"
#include "vk_video/vulkan_video_codecs_common.h"
-#include "ac_vcn_dec.h"
#include "ac_uvd_dec.h"
+#include "ac_vcn_dec.h"
#include "radv_cs.h"
#include "radv_debug.h"
-#define NUM_H264_REFS 17
-#define NUM_H265_REFS 8
+#define NUM_H264_REFS 17
+#define NUM_H265_REFS 8
#define FB_BUFFER_OFFSET 0x1000
#define FB_BUFFER_SIZE 2048
-#define FB_BUFFER_SIZE_TONGA (2048 * 64)
+#define FB_BUFFER_SIZE_TONGA (2048 * 64)
#define IT_SCALING_TABLE_SIZE 992
#define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024)
/* Not 100% sure this isn't too much but works */
#define VID_DEFAULT_ALIGNMENT 256
-const int vl_zscan_h265_up_right_diagonal_16[] =
-{
+const int vl_zscan_h265_up_right_diagonal_16[] = {
/* Up-right diagonal scan order for 4x4 blocks - see H.265 section 6.5.3. */
- 0, 4, 1, 8, 5, 2, 12, 9,
- 6, 3, 13, 10, 7, 14, 11, 15,
+ 0, 4, 1, 8, 5, 2, 12, 9, 6, 3, 13, 10, 7, 14, 11, 15,
};
-const int vl_zscan_h265_up_right_diagonal[] =
-{
+const int vl_zscan_h265_up_right_diagonal[] = {
/* Up-right diagonal scan order for 8x8 blocks - see H.265 section 6.5.3. */
- 0, 8, 1, 16, 9, 2, 24, 17,
- 10, 3, 32, 25, 18, 11, 4, 40,
- 33, 26, 19, 12, 5, 48, 41, 34,
- 27, 20, 13, 6, 56, 49, 42, 35,
- 28, 21, 14, 7, 57, 50, 43, 36,
- 29, 22, 15, 58, 51, 44, 37, 30,
- 23, 59, 52, 45, 38, 31, 60, 53,
- 46, 39, 61, 54, 47, 62, 55, 63,
+ 0, 8, 1, 16, 9, 2, 24, 17, 10, 3, 32, 25, 18, 11, 4, 40, 33, 26, 19, 12, 5, 48,
+ 41, 34, 27, 20, 13, 6, 56, 49, 42, 35, 28, 21, 14, 7, 57, 50, 43, 36, 29, 22, 15, 58,
+ 51, 44, 37, 30, 23, 59, 52, 45, 38, 31, 60, 53, 46, 39, 61, 54, 47, 62, 55, 63,
};
static bool
radv_enable_tier2(struct radv_physical_device *pdevice)
{
- if (pdevice->rad_info.family >= CHIP_NAVI21 &&
- !(pdevice->instance->debug_flags & RADV_DEBUG_VIDEO_ARRAY_PATH))
+ if (pdevice->rad_info.family >= CHIP_NAVI21 && !(pdevice->instance->debug_flags & RADV_DEBUG_VIDEO_ARRAY_PATH))
return true;
return false;
}
static bool
-radv_vid_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
- unsigned *out_offset, void **ptr)
+radv_vid_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr)
{
- return radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, size, VID_DEFAULT_ALIGNMENT,
- out_offset, ptr);
+ return radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, size, VID_DEFAULT_ALIGNMENT, out_offset, ptr);
}
/* vcn unified queue (sq) ib header */
static void
-radv_vcn_sq_header(struct radeon_cmdbuf *cs,
- struct rvcn_sq_var *sq,
- bool enc)
+radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, bool enc)
{
/* vcn ib signature */
radeon_emit(cs, RADEON_VCN_SIGNATURE_SIZE);
/* vcn ib engine info */
radeon_emit(cs, RADEON_VCN_ENGINE_INFO_SIZE);
radeon_emit(cs, RADEON_VCN_ENGINE_INFO);
- radeon_emit(cs, enc ? RADEON_VCN_ENGINE_TYPE_ENCODE
- : RADEON_VCN_ENGINE_TYPE_DECODE);
+ radeon_emit(cs, enc ? RADEON_VCN_ENGINE_TYPE_ENCODE : RADEON_VCN_ENGINE_TYPE_DECODE);
radeon_emit(cs, 0);
}
static void
-radv_vcn_sq_tail(struct radeon_cmdbuf *cs,
- struct rvcn_sq_var *sq)
+radv_vcn_sq_tail(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq)
{
uint32_t *end;
uint32_t size_in_dw;
}
/* generate an stream handle */
-static
-unsigned si_vid_alloc_stream_handle(struct radv_physical_device *pdevice)
+static unsigned
+si_vid_alloc_stream_handle(struct radv_physical_device *pdevice)
{
unsigned stream_handle = pdevice->stream_handle_base;
void
radv_init_physical_device_decoder(struct radv_physical_device *pdevice)
{
- if (pdevice->rad_info.family >= CHIP_GFX1100 ||
- pdevice->rad_info.family == CHIP_GFX940)
+ if (pdevice->rad_info.family >= CHIP_GFX1100 || pdevice->rad_info.family == CHIP_GFX940)
pdevice->vid_decode_ip = AMD_IP_VCN_UNIFIED;
else if (radv_has_uvd(pdevice))
pdevice->vid_decode_ip = AMD_IP_UVD;
}
}
-static bool have_it(struct radv_video_session *vid)
+static bool
+have_it(struct radv_video_session *vid)
{
return vid->stream_type == RDECODE_CODEC_H264_PERF || vid->stream_type == RDECODE_CODEC_H265;
}
-static unsigned calc_ctx_size_h264_perf(struct radv_video_session *vid)
+static unsigned
+calc_ctx_size_h264_perf(struct radv_video_session *vid)
{
unsigned width_in_mb, height_in_mb, ctx_size;
unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH);
return ctx_size;
}
-static unsigned calc_ctx_size_h265_main(struct radv_video_session *vid)
+static unsigned
+calc_ctx_size_h265_main(struct radv_video_session *vid)
{
unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH);
unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT);
return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
}
-static unsigned calc_ctx_size_h265_main10(struct radv_video_session *vid)
+static unsigned
+calc_ctx_size_h265_main10(struct radv_video_session *vid)
{
unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
}
VkResult
-radv_CreateVideoSessionKHR(VkDevice _device,
- const VkVideoSessionCreateInfoKHR *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkVideoSessionKHR *pVideoSession)
+radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkVideoSessionKHR *pVideoSession)
{
RADV_FROM_HANDLE(radv_device, device, _device);
memset(vid, 0, sizeof(struct radv_video_session));
- VkResult result = vk_video_session_init(&device->vk,
- &vid->vk,
- pCreateInfo);
+ VkResult result = vk_video_session_init(&device->vk, &vid->vk, pCreateInfo);
if (result != VK_SUCCESS) {
vk_free2(&device->vk.alloc, pAllocator, vid);
return result;
vid->stream_handle = si_vid_alloc_stream_handle(device->physical_device);
vid->dbg_frame_cnt = 0;
- vid->db_alignment = (device->physical_device->rad_info.family >= CHIP_RENOIR &&
- vid->vk.max_coded.width > 32 &&
- (vid->stream_type == RDECODE_CODEC_H265 &&
- vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)) ? 64 : 32;
+ vid->db_alignment =
+ (device->physical_device->rad_info.family >= CHIP_RENOIR && vid->vk.max_coded.width > 32 &&
+ (vid->stream_type == RDECODE_CODEC_H265 && vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10))
+ ? 64
+ : 32;
*pVideoSession = radv_video_session_to_handle(vid);
return VK_SUCCESS;
}
void
-radv_DestroyVideoSessionKHR(VkDevice _device,
- VkVideoSessionKHR _session,
- const VkAllocationCallbacks *pAllocator)
+radv_DestroyVideoSessionKHR(VkDevice _device, VkVideoSessionKHR _session, const VkAllocationCallbacks *pAllocator)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_video_session, vid, _session);
vk_free2(&device->vk.alloc, pAllocator, vid);
}
-
VkResult
-radv_CreateVideoSessionParametersKHR(VkDevice _device,
- const VkVideoSessionParametersCreateInfoKHR *pCreateInfo,
+radv_CreateVideoSessionParametersKHR(VkDevice _device, const VkVideoSessionParametersCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkVideoSessionParametersKHR *pVideoSessionParameters)
{
if (!params)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- VkResult result = vk_video_session_parameters_init(&device->vk,
- ¶ms->vk,
- &vid->vk,
- templ ? &templ->vk : NULL,
- pCreateInfo);
+ VkResult result =
+ vk_video_session_parameters_init(&device->vk, ¶ms->vk, &vid->vk, templ ? &templ->vk : NULL, pCreateInfo);
if (result != VK_SUCCESS) {
vk_free2(&device->vk.alloc, pAllocator, params);
return result;
}
void
-radv_DestroyVideoSessionParametersKHR(VkDevice _device,
- VkVideoSessionParametersKHR _params,
+radv_DestroyVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR _params,
const VkAllocationCallbacks *pAllocator)
{
RADV_FROM_HANDLE(radv_device, device, _device);
}
VkResult
-radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice,
- const VkVideoProfileInfoKHR *pVideoProfile,
+radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, const VkVideoProfileInfoKHR *pVideoProfile,
VkVideoCapabilitiesKHR *pCapabilities)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
pCapabilities->minCodedExtent.width = VL_MACROBLOCK_WIDTH;
pCapabilities->minCodedExtent.height = VL_MACROBLOCK_HEIGHT;
- struct VkVideoDecodeCapabilitiesKHR *dec_caps = (struct VkVideoDecodeCapabilitiesKHR *)
- vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_CAPABILITIES_KHR);
+ struct VkVideoDecodeCapabilitiesKHR *dec_caps =
+ (struct VkVideoDecodeCapabilitiesKHR *)vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_CAPABILITIES_KHR);
if (dec_caps)
dec_caps->flags = VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR;
switch (pVideoProfile->videoCodecOperation) {
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
- struct VkVideoDecodeH264CapabilitiesKHR *ext = (struct VkVideoDecodeH264CapabilitiesKHR *)
- vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_H264_CAPABILITIES_KHR);
+ struct VkVideoDecodeH264CapabilitiesKHR *ext = (struct VkVideoDecodeH264CapabilitiesKHR *)vk_find_struct(
+ pCapabilities->pNext, VIDEO_DECODE_H264_CAPABILITIES_KHR);
const struct VkVideoDecodeH264ProfileInfoKHR *h264_profile =
- vk_find_struct_const(pVideoProfile->pNext,
- VIDEO_DECODE_H264_PROFILE_INFO_KHR);
+ vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H264_PROFILE_INFO_KHR);
if (h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_BASELINE &&
h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_MAIN &&
break;
}
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
- struct VkVideoDecodeH265CapabilitiesKHR *ext = (struct VkVideoDecodeH265CapabilitiesKHR *)
- vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_H265_CAPABILITIES_KHR);
+ struct VkVideoDecodeH265CapabilitiesKHR *ext = (struct VkVideoDecodeH265CapabilitiesKHR *)vk_find_struct(
+ pCapabilities->pNext, VIDEO_DECODE_H265_CAPABILITIES_KHR);
const struct VkVideoDecodeH265ProfileInfoKHR *h265_profile =
- vk_find_struct_const(pVideoProfile->pNext,
- VIDEO_DECODE_H265_PROFILE_INFO_KHR);
+ vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H265_PROFILE_INFO_KHR);
if (h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN &&
h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_10 &&
pCapabilities->maxCodedExtent.height = (pdevice->rad_info.family < CHIP_TONGA) ? 1152 : 4096;
break;
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
- pCapabilities->maxCodedExtent.width = (pdevice->rad_info.family < CHIP_RENOIR) ?
- ((pdevice->rad_info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
- pCapabilities->maxCodedExtent.height = (pdevice->rad_info.family < CHIP_RENOIR) ?
- ((pdevice->rad_info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
+ pCapabilities->maxCodedExtent.width =
+ (pdevice->rad_info.family < CHIP_RENOIR) ? ((pdevice->rad_info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
+ pCapabilities->maxCodedExtent.height =
+ (pdevice->rad_info.family < CHIP_RENOIR) ? ((pdevice->rad_info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
break;
default:
break;
VkVideoFormatPropertiesKHR *pVideoFormatProperties)
{
/* radv requires separate allocates for DPB and decode video. */
- if ((pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR |
- VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) ==
+ if ((pVideoFormatInfo->imageUsage &
+ (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) ==
(VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
return VK_ERROR_FORMAT_NOT_SUPPORTED;
- VK_OUTARRAY_MAKE_TYPED(VkVideoFormatPropertiesKHR, out,
- pVideoFormatProperties,
- pVideoFormatPropertyCount);
+ VK_OUTARRAY_MAKE_TYPED(VkVideoFormatPropertiesKHR, out, pVideoFormatProperties, pVideoFormatPropertyCount);
bool need_8bit = true;
bool need_10bit = false;
- const struct VkVideoProfileListInfoKHR *prof_list = (struct VkVideoProfileListInfoKHR *)
- vk_find_struct_const(pVideoFormatInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR);
+ const struct VkVideoProfileListInfoKHR *prof_list =
+ (struct VkVideoProfileListInfoKHR *)vk_find_struct_const(pVideoFormatInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR);
if (prof_list) {
for (unsigned i = 0; i < prof_list->profileCount; i++) {
const VkVideoProfileInfoKHR *profile = &prof_list->pProfiles[i];
}
if (need_10bit) {
- vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p) {
+ vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
+ {
p->format = VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16;
p->imageType = VK_IMAGE_TYPE_2D;
p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
}
if (need_8bit) {
- vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p) {
- p->format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
- p->imageType = VK_IMAGE_TYPE_2D;
- p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
- p->imageUsageFlags = pVideoFormatInfo->imageUsage;
- }
+ vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
+ {
+ p->format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
+ p->imageType = VK_IMAGE_TYPE_2D;
+ p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
+ p->imageUsageFlags = pVideoFormatInfo->imageUsage;
+ }
}
return vk_outarray_status(&out);
#define RADV_BIND_DECODER_CTX 1
VkResult
-radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device,
- VkVideoSessionKHR videoSession,
+radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR videoSession,
uint32_t *pMemoryRequirementsCount,
VkVideoSessionMemoryRequirementsKHR *pMemoryRequirements)
{
RADV_FROM_HANDLE(radv_video_session, vid, videoSession);
uint32_t memory_type_bits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
- VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out,
- pMemoryRequirements,
- pMemoryRequirementsCount);
+ VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out, pMemoryRequirements, pMemoryRequirementsCount);
/* 1 buffer for session context */
if (device->physical_device->rad_info.family >= CHIP_POLARIS10) {
- vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) {
+ vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
+ {
m->memoryBindIndex = RADV_BIND_SESSION_CTX;
m->memoryRequirements.size = RDECODE_SESSION_CONTEXT_SIZE;
m->memoryRequirements.alignment = 0;
}
}
- if (vid->stream_type == RDECODE_CODEC_H264_PERF &&
- device->physical_device->rad_info.family >= CHIP_POLARIS10) {
- vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) {
+ if (vid->stream_type == RDECODE_CODEC_H264_PERF && device->physical_device->rad_info.family >= CHIP_POLARIS10) {
+ vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
+ {
m->memoryBindIndex = RADV_BIND_DECODER_CTX;
m->memoryRequirements.size = align(calc_ctx_size_h264_perf(vid), 4096);
m->memoryRequirements.alignment = 0;
ctx_size = calc_ctx_size_h265_main10(vid);
else
ctx_size = calc_ctx_size_h265_main(vid);
- vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) {
+ vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
+ {
m->memoryBindIndex = RADV_BIND_DECODER_CTX;
m->memoryRequirements.size = align(ctx_size, 4096);
m->memoryRequirements.alignment = 0;
}
VkResult
-radv_UpdateVideoSessionParametersKHR(VkDevice _device,
- VkVideoSessionParametersKHR videoSessionParameters,
+radv_UpdateVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR videoSessionParameters,
const VkVideoSessionParametersUpdateInfoKHR *pUpdateInfo)
{
RADV_FROM_HANDLE(radv_video_session_params, params, videoSessionParameters);
}
static void
-copy_bind(struct radv_vid_mem *dst,
- const VkBindVideoSessionMemoryInfoKHR *src)
+copy_bind(struct radv_vid_mem *dst, const VkBindVideoSessionMemoryInfoKHR *src)
{
dst->mem = radv_device_memory_from_handle(src->memory);
dst->offset = src->memoryOffset;
}
VkResult
-radv_BindVideoSessionMemoryKHR(VkDevice _device,
- VkVideoSessionKHR videoSession,
- uint32_t videoSessionBindMemoryCount,
+radv_BindVideoSessionMemoryKHR(VkDevice _device, VkVideoSessionKHR videoSession, uint32_t videoSessionBindMemoryCount,
const VkBindVideoSessionMemoryInfoKHR *pBindSessionMemoryInfos)
{
RADV_FROM_HANDLE(radv_video_session, vid, videoSession);
}
/* add a new set register command to the IB */
-static void set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val)
+static void
+set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
radeon_emit(cs, RDECODE_PKT0(reg >> 2, 0));
radeon_emit(cs, val);
}
-static void send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd,
- struct radeon_winsys_bo *bo, uint32_t offset)
+static void
+send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_bo *bo, uint32_t offset)
{
struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
uint64_t addr;
set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1);
return;
}
- switch(cmd) {
+ switch (cmd) {
case RDECODE_CMD_MSG_BUFFER:
cmd_buffer->video.decode_buffer->valid_buf_flag |= RDECODE_CMDBUF_FLAGS_MSG_BUFFER;
cmd_buffer->video.decode_buffer->msg_buffer_address_hi = (addr >> 32);
}
}
-static void rvcn_dec_message_create(struct radv_video_session *vid,
- void *ptr, uint32_t size)
+static void
+rvcn_dec_message_create(struct radv_video_session *vid, void *ptr, uint32_t size)
{
rvcn_dec_message_header_t *header = ptr;
rvcn_dec_message_create_t *create = (void *)((char *)ptr + sizeof(rvcn_dec_message_header_t));
create->height_in_samples = vid->vk.max_coded.height;
}
-static void rvcn_dec_message_feedback(void *ptr)
+static void
+rvcn_dec_message_feedback(void *ptr)
{
rvcn_dec_feedback_header_t *header = (void *)ptr;
header->num_buffers = 0;
}
-static const uint8_t h264_levels[] = { 10, 11, 12, 13, 20, 21, 22,
- 30, 31, 32, 40, 41, 42,
- 50, 51, 52, 60, 61, 62 };
-static uint8_t get_h264_level(StdVideoH264LevelIdc level)
+static const uint8_t h264_levels[] = {10, 11, 12, 13, 20, 21, 22, 30, 31, 32, 40, 41, 42, 50, 51, 52, 60, 61, 62};
+static uint8_t
+get_h264_level(StdVideoH264LevelIdc level)
{
- assert (level <= STD_VIDEO_H264_LEVEL_IDC_6_2);
+ assert(level <= STD_VIDEO_H264_LEVEL_IDC_6_2);
return h264_levels[level];
}
-static rvcn_dec_message_avc_t get_h264_msg(struct radv_video_session *vid,
- struct radv_video_session_params *params,
- const struct VkVideoDecodeInfoKHR *frame_info,
- uint32_t *slice_offset,
- uint32_t *width_in_samples,
- uint32_t *height_in_samples,
- void *it_ptr)
+static rvcn_dec_message_avc_t
+get_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params,
+ const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples,
+ uint32_t *height_in_samples, void *it_ptr)
{
rvcn_dec_message_avc_t result;
const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
memset(&result, 0, sizeof(result));
assert(params->vk.h264_dec.std_sps_count > 0);
- const StdVideoH264SequenceParameterSet *sps = vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
+ const StdVideoH264SequenceParameterSet *sps =
+ vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
switch (sps->profile_idc) {
case STD_VIDEO_H264_PROFILE_IDC_BASELINE:
result.profile = RDECODE_H264_PROFILE_BASELINE;
break;
default:
fprintf(stderr, "UNSUPPORTED CODEC %d\n", sps->profile_idc);
- result.profile= RDECODE_H264_PROFILE_MAIN;
+ result.profile = RDECODE_H264_PROFILE_MAIN;
break;
}
result.chroma_format = sps->chroma_format_idc;
- const StdVideoH264PictureParameterSet *pps = vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
+ const StdVideoH264PictureParameterSet *pps =
+ vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
result.pps_info_flags = 0;
result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0;
result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1;
memcpy(result.scaling_list_8x8[0], sps->pScalingLists->ScalingList8x8[0], 64);
memcpy(result.scaling_list_8x8[1], sps->pScalingLists->ScalingList8x8[3], 64);
} else {
- memset(result.scaling_list_4x4, 0x10, 6*16);
- memset(result.scaling_list_8x8, 0x10, 2*64);
+ memset(result.scaling_list_4x4, 0x10, 6 * 16);
+ memset(result.scaling_list_8x8, 0x10, 2 * 64);
}
memset(it_ptr, 0, IT_SCALING_TABLE_SIZE);
if (dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
result.used_for_reference_flags |= (1 << (2 * i + 1));
- if (!dpb_slot->pStdReferenceInfo->flags.top_field_flag &&
- !dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
+ if (!dpb_slot->pStdReferenceInfo->flags.top_field_flag && !dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
result.used_for_reference_flags |= (3 << (2 * i));
if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference)
- result.ref_frame_list[i] |= 0x80;
+ result.ref_frame_list[i] |= 0x80;
if (dpb_slot->pStdReferenceInfo->flags.is_non_existing)
result.non_existing_frame_flags |= 1 << i;
-
}
result.curr_pic_ref_frame_num = frame_info->referenceSlotCount;
result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex;
return result;
}
-static void update_h265_scaling(void *it_ptr,
- const StdVideoH265ScalingLists *scaling_lists)
+static void
+update_h265_scaling(void *it_ptr, const StdVideoH265ScalingLists *scaling_lists)
{
- uint8_t ScalingList4x4[STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS] = { 0 };
- uint8_t ScalingList8x8[STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS] = { 0 };
- uint8_t ScalingList16x16[STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS] = { 0 };
- uint8_t ScalingList32x32[STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS] = { 0 };
+ uint8_t ScalingList4x4[STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS] = {
+ 0};
+ uint8_t ScalingList8x8[STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS] = {
+ 0};
+ uint8_t ScalingList16x16[STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS]
+ [STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS] = {0};
+ uint8_t ScalingList32x32[STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS]
+ [STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS] = {0};
int i, j;
if (scaling_lists) {
- for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++) {
+ for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++) {
for (j = 0; j < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS; j++)
ScalingList4x4[i][j] = scaling_lists->ScalingList4x4[i][vl_zscan_h265_up_right_diagonal_16[j]];
for (j = 0; j < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS; j++) {
}
}
- memcpy(it_ptr, ScalingList4x4, STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS);
- memcpy((char *)it_ptr + 96, ScalingList8x8, STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS);
- memcpy((char *)it_ptr + 480, ScalingList16x16, STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS);
- memcpy((char *)it_ptr + 864, ScalingList32x32, STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS);
+ memcpy(it_ptr, ScalingList4x4,
+ STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS);
+ memcpy((char *)it_ptr + 96, ScalingList8x8,
+ STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS);
+ memcpy((char *)it_ptr + 480, ScalingList16x16,
+ STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS);
+ memcpy((char *)it_ptr + 864, ScalingList32x32,
+ STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS);
}
-static rvcn_dec_message_hevc_t get_h265_msg(struct radv_device *device,
- struct radv_video_session *vid,
- struct radv_video_session_params *params,
- const struct VkVideoDecodeInfoKHR *frame_info,
- void *it_ptr)
+static rvcn_dec_message_hevc_t
+get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
+ const struct VkVideoDecodeInfoKHR *frame_info, void *it_ptr)
{
rvcn_dec_message_hevc_t result;
int i, j;
vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
memset(&result, 0, sizeof(result));
- const StdVideoH265SequenceParameterSet *sps = vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->sps_video_parameter_set_id);
- const StdVideoH265PictureParameterSet *pps = vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
+ const StdVideoH265SequenceParameterSet *sps =
+ vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->sps_video_parameter_set_id);
+ const StdVideoH265PictureParameterSet *pps =
+ vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
result.sps_info_flags = 0;
result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0;
result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
- result.sps_max_dec_pic_buffering_minus1 = sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
- result.log2_min_luma_coding_block_size_minus3 =
- sps->log2_min_luma_coding_block_size_minus3;
- result.log2_diff_max_min_luma_coding_block_size =
- sps->log2_diff_max_min_luma_coding_block_size;
- result.log2_min_transform_block_size_minus2 =
- sps->log2_min_luma_transform_block_size_minus2;
- result.log2_diff_max_min_transform_block_size =
- sps->log2_diff_max_min_luma_transform_block_size;
+ result.sps_max_dec_pic_buffering_minus1 =
+ sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
+ result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
+ result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
+ result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
+ result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size;
result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
if (sps->flags.pcm_enabled_flag) {
for (i = 0; i < 8; ++i)
result.ref_pic_set_lt_curr[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetLtCurr[i]);
-
const StdVideoH265ScalingLists *scaling_lists = NULL;
if (pps->flags.pps_scaling_list_data_present_flag)
scaling_lists = pps->pScalingLists;
for (i = 0; i < 2; i++) {
for (j = 0; j < 15; j++)
- result.direct_reflist[i][j] = 0xff;//pic->RefPicList[i][j];
+ result.direct_reflist[i][j] = 0xff; // pic->RefPicList[i][j];
}
if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) {
return result;
}
-static bool rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer,
- struct radv_video_session *vid,
- struct radv_video_session_params *params,
- void *ptr,
- void *it_ptr,
- uint32_t *slice_offset,
- const struct VkVideoDecodeInfoKHR *frame_info)
+static bool
+rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_session *vid,
+ struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset,
+ const struct VkVideoDecodeInfoKHR *frame_info)
{
struct radv_device *device = cmd_buffer->device;
rvcn_dec_message_header_t *header;
struct radv_image *img = dst_iv->image;
struct radv_image_plane *luma = &img->planes[0];
struct radv_image_plane *chroma = &img->planes[1];
- struct radv_image_view *dpb_iv = radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
+ struct radv_image_view *dpb_iv =
+ radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
struct radv_image *dpb = dpb_iv->image;
header = ptr;
}
offset_decode = sizes;
- decode = (void *)((char*)header + sizes);
+ decode = (void *)((char *)header + sizes);
sizes += sizeof(rvcn_dec_message_decode_t);
if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
offset_dynamic_dpb = sizes;
- dynamic_dpb_t2 = (void*)((char *)header + sizes);
+ dynamic_dpb_t2 = (void *)((char *)header + sizes);
sizes += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
}
decode->dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0;
- decode->dt_size = dst_iv->image->planes[0].surface.total_size +
- dst_iv->image->planes[1].surface.total_size;
+ decode->dt_size = dst_iv->image->planes[0].surface.total_size + dst_iv->image->planes[1].surface.total_size;
decode->sct_size = 0;
decode->sc_coeff_size = 0;
decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
if (decode->dt_field_mode) {
- decode->dt_luma_bottom_offset =
- luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size;
- decode->dt_chroma_bottom_offset =
- chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size;
+ decode->dt_luma_bottom_offset = luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size;
+ decode->dt_chroma_bottom_offset = chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size;
} else {
decode->dt_luma_bottom_offset = decode->dt_luma_top_offset;
decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset;
*slice_offset = 0;
switch (vid->vk.op) {
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
- rvcn_dec_message_avc_t avc = get_h264_msg(vid, params, frame_info, slice_offset, &decode->width_in_samples, &decode->height_in_samples, it_ptr);
+ rvcn_dec_message_avc_t avc = get_h264_msg(vid, params, frame_info, slice_offset, &decode->width_in_samples,
+ &decode->height_in_samples, it_ptr);
memcpy(codec, (void *)&avc, sizeof(rvcn_dec_message_avc_t));
index_codec->message_id = RDECODE_MESSAGE_AVC;
break;
uint64_t addr;
for (int i = 0; i < frame_info->referenceSlotCount; i++) {
- struct radv_image_view *f_dpb_iv = radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
+ struct radv_image_view *f_dpb_iv =
+ radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
struct radv_image *dpb_img = f_dpb_iv->image;
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo);
return true;
}
-static struct ruvd_h264 get_uvd_h264_msg(struct radv_video_session *vid,
- struct radv_video_session_params *params,
- const struct VkVideoDecodeInfoKHR *frame_info,
- uint32_t *slice_offset,
- uint32_t *width_in_samples,
- uint32_t *height_in_samples,
- void *it_ptr)
+static struct ruvd_h264
+get_uvd_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params,
+ const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples,
+ uint32_t *height_in_samples, void *it_ptr)
{
struct ruvd_h264 result;
const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
memset(&result, 0, sizeof(result));
- const StdVideoH264SequenceParameterSet *sps = vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
+ const StdVideoH264SequenceParameterSet *sps =
+ vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
switch (sps->profile_idc) {
case STD_VIDEO_H264_PROFILE_IDC_BASELINE:
result.profile = RUVD_H264_PROFILE_BASELINE;
result.chroma_format = sps->chroma_format_idc;
- const StdVideoH264PictureParameterSet *pps = vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
+ const StdVideoH264PictureParameterSet *pps =
+ vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
result.pps_info_flags = 0;
result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0;
result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1;
memcpy(result.scaling_list_8x8[0], sps->pScalingLists->ScalingList8x8[0], 64);
memcpy(result.scaling_list_8x8[1], sps->pScalingLists->ScalingList8x8[3], 64);
} else {
- memset(result.scaling_list_4x4, 0x10, 6*16);
- memset(result.scaling_list_8x8, 0x10, 2*64);
+ memset(result.scaling_list_4x4, 0x10, 6 * 16);
+ memset(result.scaling_list_8x8, 0x10, 2 * 64);
}
memset(it_ptr, 0, IT_SCALING_TABLE_SIZE);
result.ref_frame_list[i] = idx;
if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference)
- result.ref_frame_list[i] |= 0x80;
+ result.ref_frame_list[i] |= 0x80;
}
result.curr_pic_ref_frame_num = frame_info->referenceSlotCount;
result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex;
return result;
}
-static struct ruvd_h265 get_uvd_h265_msg(struct radv_device *device,
- struct radv_video_session *vid,
- struct radv_video_session_params *params,
- const struct VkVideoDecodeInfoKHR *frame_info,
- void *it_ptr)
+static struct ruvd_h265
+get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
+ const struct VkVideoDecodeInfoKHR *frame_info, void *it_ptr)
{
struct ruvd_h265 result;
int i, j;
memset(&result, 0, sizeof(result));
- const StdVideoH265SequenceParameterSet *sps = vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->sps_video_parameter_set_id);
- const StdVideoH265PictureParameterSet *pps = vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
+ const StdVideoH265SequenceParameterSet *sps =
+ vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->sps_video_parameter_set_id);
+ const StdVideoH265PictureParameterSet *pps =
+ vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
result.sps_info_flags = 0;
result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0;
result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
- result.sps_max_dec_pic_buffering_minus1 = sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
- result.log2_min_luma_coding_block_size_minus3 =
- sps->log2_min_luma_coding_block_size_minus3;
- result.log2_diff_max_min_luma_coding_block_size =
- sps->log2_diff_max_min_luma_coding_block_size;
- result.log2_min_transform_block_size_minus2 =
- sps->log2_min_luma_transform_block_size_minus2;
- result.log2_diff_max_min_transform_block_size =
- sps->log2_diff_max_min_luma_transform_block_size;
+ result.sps_max_dec_pic_buffering_minus1 =
+ sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
+ result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
+ result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
+ result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
+ result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size;
result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
if (sps->flags.pcm_enabled_flag) {
result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1;
result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1;
- result.log2_min_pcm_luma_coding_block_size_minus3 =
- sps->log2_min_pcm_luma_coding_block_size_minus3;
- result.log2_diff_max_min_pcm_luma_coding_block_size =
- sps->log2_diff_max_min_pcm_luma_coding_block_size;
+ result.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3;
+ result.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size;
}
result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
for (i = 0; i < 2; i++) {
for (j = 0; j < 15; j++)
- result.direct_reflist[i][j] = 0xff;//pic->RefPicList[i][j];
+ result.direct_reflist[i][j] = 0xff; // pic->RefPicList[i][j];
}
if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) {
return result;
}
-static unsigned texture_offset_legacy(struct radeon_surf *surface, unsigned layer)
+static unsigned
+texture_offset_legacy(struct radeon_surf *surface, unsigned layer)
{
return (uint64_t)surface->u.legacy.level[0].offset_256B * 256 +
- layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4;
+ layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4;
}
-static bool ruvd_dec_message_decode(struct radv_device *device,
- struct radv_video_session *vid,
- struct radv_video_session_params *params,
- void *ptr,
- void *it_ptr,
- uint32_t *slice_offset,
- const struct VkVideoDecodeInfoKHR *frame_info)
+static bool
+ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *vid,
+ struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset,
+ const struct VkVideoDecodeInfoKHR *frame_info)
{
struct ruvd_msg *msg = ptr;
struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
struct radv_image *img = dst_iv->image;
struct radv_image_plane *luma = &img->planes[0];
struct radv_image_plane *chroma = &img->planes[1];
- struct radv_image_view *dpb_iv = radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
+ struct radv_image_view *dpb_iv =
+ radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
struct radv_image *dpb = dpb_iv->image;
memset(msg, 0, sizeof(struct ruvd_msg));
msg->body.decode.bsd_size = frame_info->srcBufferRange;
msg->body.decode.db_pitch = align(frame_info->dstPictureResource.codedExtent.width, vid->db_alignment);
- if (vid->stream_type == RUVD_CODEC_H264_PERF &&
- device->physical_device->rad_info.family >= CHIP_POLARIS10)
+ if (vid->stream_type == RUVD_CODEC_H264_PERF && device->physical_device->rad_info.family >= CHIP_POLARIS10)
msg->body.decode.dpb_reserved = vid->ctx.size;
*slice_offset = 0;
switch (vid->vk.op) {
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
- msg->body.decode.codec.h264 = get_uvd_h264_msg(vid, params, frame_info,
- slice_offset,
- &msg->body.decode.width_in_samples,
- &msg->body.decode.height_in_samples,
- it_ptr);
+ msg->body.decode.codec.h264 =
+ get_uvd_h264_msg(vid, params, frame_info, slice_offset, &msg->body.decode.width_in_samples,
+ &msg->body.decode.height_in_samples, it_ptr);
break;
}
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
return true;
}
-static void ruvd_dec_message_create(struct radv_video_session *vid,
- void *ptr)
+static void
+ruvd_dec_message_create(struct radv_video_session *vid, void *ptr)
{
struct ruvd_msg *msg = ptr;
}
void
-radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer,
- const VkVideoBeginCodingInfoKHR *pBeginInfo)
+radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoBeginCodingInfoKHR *pBeginInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_video_session, vid, pBeginInfo->videoSession);
if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED) {
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 256);
radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, false);
- rvcn_decode_ib_package_t *ib_header =
- (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
- ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) +
- sizeof(struct rvcn_decode_ib_package_s);
+ rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
+ ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s);
cmd_buffer->cs->cdw++;
ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
cmd_buffer->cs->cdw++;
- cmd_buffer->video.decode_buffer =
- (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
+ cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
}
-
}
static void
void *ptr;
uint32_t out_offset;
- radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset,
- &ptr);
+ radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
rvcn_dec_message_create(vid, ptr, size);
send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
uint32_t size = sizeof(struct ruvd_msg);
void *ptr;
uint32_t out_offset;
- radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset,
- &ptr);
+ radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
ruvd_dec_message_create(vid, ptr);
if (vid->sessionctx.mem)
}
void
-radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer,
- const VkVideoCodingControlInfoKHR *pCodingControlInfo)
+radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoCodingControlInfoKHR *pCodingControlInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) {
}
void
-radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer,
- const VkVideoEndCodingInfoKHR *pEndCodingInfo)
+radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoEndCodingInfoKHR *pEndCodingInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
}
static void
-radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer,
- const VkVideoDecodeInfoKHR *frame_info)
+radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
{
RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
struct radv_video_session *vid = cmd_buffer->video.vid;
void *ptr, *fb_ptr, *it_ptr = NULL;
uint32_t out_offset, fb_offset, it_offset = 0;
struct radeon_winsys_bo *msg_bo, *fb_bo, *it_bo = NULL;
- unsigned fb_size = (cmd_buffer->device->physical_device->rad_info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
+ unsigned fb_size =
+ (cmd_buffer->device->physical_device->rad_info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
- radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset,
- &fb_ptr);
+ radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr);
fb_bo = cmd_buffer->upload.upload_bo;
if (have_it(vid)) {
- radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_offset,
- &it_ptr);
+ radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_offset, &it_ptr);
it_bo = cmd_buffer->upload.upload_bo;
}
- radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset,
- &ptr);
+ radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
msg_bo = cmd_buffer->upload.upload_bo;
uint32_t slice_offset;
send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);
if (vid->dpb_type != DPB_DYNAMIC_TIER_2) {
- struct radv_image_view *dpb_iv = radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
+ struct radv_image_view *dpb_iv =
+ radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
struct radv_image *dpb = dpb_iv->image;
send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset);
}
if (vid->ctx.mem)
send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset);
- send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo, src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
+ send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo,
+ src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
struct radv_image *img = dst_iv->image;
}
static void
-radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer,
- const VkVideoDecodeInfoKHR *frame_info)
+radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
{
RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
struct radv_video_session *vid = cmd_buffer->video.vid;
size += sizeof(rvcn_dec_message_index_t);
size += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
}
- size += sizeof(rvcn_dec_message_decode_t); /* decode */
+ size += sizeof(rvcn_dec_message_decode_t); /* decode */
switch (vid->vk.op) {
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
size += sizeof(rvcn_dec_message_avc_t);
unreachable("unsupported codec.");
}
- radv_vid_buffer_upload_alloc(cmd_buffer, FB_BUFFER_SIZE, &fb_offset,
- &fb_ptr);
+ radv_vid_buffer_upload_alloc(cmd_buffer, FB_BUFFER_SIZE, &fb_offset, &fb_ptr);
fb_bo = cmd_buffer->upload.upload_bo;
if (have_it(vid)) {
- radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_offset,
- &it_ptr);
+ radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_offset, &it_ptr);
it_bo = cmd_buffer->upload.upload_bo;
}
- radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset,
- &ptr);
+ radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
msg_bo = cmd_buffer->upload.upload_bo;
uint32_t slice_offset;
send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);
if (vid->dpb_type != DPB_DYNAMIC_TIER_2) {
- struct radv_image_view *dpb_iv = radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
+ struct radv_image_view *dpb_iv =
+ radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
struct radv_image *dpb = dpb_iv->image;
send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset);
}
if (vid->ctx.mem)
send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset);
- send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo, src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
+ send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo,
+ src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
struct radv_image *img = dst_iv->image;
}
void
-radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer,
- const VkVideoDecodeInfoKHR *frame_info)
+radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer, const VkVideoDecodeInfoKHR *frame_info)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
* IN THE SOFTWARE.
*/
+#include "meta/radv_meta.h"
#include "util/macros.h"
#include "radv_debug.h"
-#include "meta/radv_meta.h"
#include "radv_private.h"
#include "vk_fence.h"
#include "vk_semaphore.h"
.queueCount = 1,
};
- device->private_sdma_queue = vk_zalloc(&device->vk.alloc, sizeof(struct radv_queue), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ device->private_sdma_queue =
+ vk_zalloc(&device->vk.alloc, sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
VkResult result = radv_queue_init(device, device->private_sdma_queue, 0, &queue_create, NULL);
if (result == VK_SUCCESS) {
radv_init_wsi(struct radv_physical_device *physical_device)
{
VkResult result =
- wsi_device_init(&physical_device->wsi_device, radv_physical_device_to_handle(physical_device),
- radv_wsi_proc_addr, &physical_device->instance->vk.alloc,
- physical_device->master_fd, &physical_device->instance->dri_options, &(struct wsi_device_options){.sw_device = false});
+ wsi_device_init(&physical_device->wsi_device, radv_physical_device_to_handle(physical_device), radv_wsi_proc_addr,
+ &physical_device->instance->vk.alloc, physical_device->master_fd,
+ &physical_device->instance->dri_options, &(struct wsi_device_options){.sw_device = false});
if (result != VK_SUCCESS)
return result;
#include "sid.h"
static void
-si_write_harvested_raster_configs(struct radv_physical_device *physical_device,
- struct radeon_cmdbuf *cs, unsigned raster_config,
- unsigned raster_config_1)
+si_write_harvested_raster_configs(struct radv_physical_device *physical_device, struct radeon_cmdbuf *cs,
+ unsigned raster_config, unsigned raster_config_1)
{
unsigned num_se = MAX2(physical_device->rad_info.max_se, 1);
unsigned raster_config_se[4];
unsigned se;
- ac_get_harvested_configs(&physical_device->rad_info, raster_config, &raster_config_1,
- raster_config_se);
+ ac_get_harvested_configs(&physical_device->rad_info, raster_config, &raster_config_1, raster_config_se);
for (se = 0; se < num_se; se++) {
/* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */
if (physical_device->rad_info.gfx_level < GFX7)
- radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
- S_00802C_SE_INDEX(se) | S_00802C_SH_BROADCAST_WRITES(1) |
- S_00802C_INSTANCE_BROADCAST_WRITES(1));
+ radeon_set_config_reg(
+ cs, R_00802C_GRBM_GFX_INDEX,
+ S_00802C_SE_INDEX(se) | S_00802C_SH_BROADCAST_WRITES(1) | S_00802C_INSTANCE_BROADCAST_WRITES(1));
else
- radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
- S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
+ radeon_set_uconfig_reg(
+ cs, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1));
radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]);
}
/* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */
if (physical_device->rad_info.gfx_level < GFX7)
- radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
- S_00802C_SE_BROADCAST_WRITES(1) | S_00802C_SH_BROADCAST_WRITES(1) |
- S_00802C_INSTANCE_BROADCAST_WRITES(1));
+ radeon_set_config_reg(
+ cs, R_00802C_GRBM_GFX_INDEX,
+ S_00802C_SE_BROADCAST_WRITES(1) | S_00802C_SH_BROADCAST_WRITES(1) | S_00802C_INSTANCE_BROADCAST_WRITES(1));
else
- radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
- S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
+ radeon_set_uconfig_reg(
+ cs, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1));
if (physical_device->rad_info.gfx_level >= GFX7)
radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
radeon_emit(cs, 0);
radeon_emit(cs, 0);
- radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI,
- S_00B834_DATA(device->physical_device->rad_info.address32_hi >> 8));
+ radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(device->physical_device->rad_info.address32_hi >> 8));
radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
}
}
- if (device->physical_device->rad_info.gfx_level >= GFX9 &&
- device->physical_device->rad_info.gfx_level < GFX11) {
+ if (device->physical_device->rad_info.gfx_level >= GFX9 && device->physical_device->rad_info.gfx_level < GFX11) {
radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY,
device->physical_device->rad_info.gfx_level >= GFX10 ? 0x20 : 0);
}
if (!has_clear_state && physical_device->rad_info.gfx_level < GFX11)
radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);
if (physical_device->rad_info.gfx_level < GFX7)
- radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE,
- S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1));
+ radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1));
if (!has_clear_state)
radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
*/
if (physical_device->rad_info.gfx_level <= GFX7 || !has_clear_state) {
radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
- radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL,
- S_028204_WINDOW_OFFSET_DISABLE(1));
- radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL,
- S_028240_WINDOW_OFFSET_DISABLE(1));
- radeon_set_context_reg(
- cs, R_028244_PA_SC_GENERIC_SCISSOR_BR,
- S_028244_BR_X(MAX_FRAMEBUFFER_WIDTH) | S_028244_BR_Y(MAX_FRAMEBUFFER_HEIGHT));
+ radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
+ radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
+ radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR,
+ S_028244_BR_X(MAX_FRAMEBUFFER_WIDTH) | S_028244_BR_Y(MAX_FRAMEBUFFER_HEIGHT));
radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
- radeon_set_context_reg(
- cs, R_028034_PA_SC_SCREEN_SCISSOR_BR,
- S_028034_BR_X(MAX_FRAMEBUFFER_WIDTH) | S_028034_BR_Y(MAX_FRAMEBUFFER_HEIGHT));
+ radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR,
+ S_028034_BR_X(MAX_FRAMEBUFFER_WIDTH) | S_028034_BR_Y(MAX_FRAMEBUFFER_HEIGHT));
}
if (!has_clear_state) {
radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
}
- radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE,
- S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
- S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
+ radeon_set_context_reg(
+ cs, R_02800C_DB_RENDER_OVERRIDE,
+ S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
if (physical_device->rad_info.gfx_level >= GFX10) {
radeon_set_context_reg(cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0);
radeon_set_uconfig_reg(cs, R_030988_GE_USER_VGPR_EN, 0);
if (physical_device->rad_info.gfx_level < GFX11) {
- radeon_set_context_reg(
- cs, R_028038_DB_DFSM_CONTROL,
- S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF) | S_028038_POPS_DRAIN_PS_ON_OVERLAP(1));
+ radeon_set_context_reg(cs, R_028038_DB_DFSM_CONTROL,
+ S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF) | S_028038_POPS_DRAIN_PS_ON_OVERLAP(1));
}
} else if (physical_device->rad_info.gfx_level == GFX9) {
radeon_set_uconfig_reg(cs, R_030920_VGT_MAX_VTX_INDX, ~0);
radeon_set_uconfig_reg(cs, R_030928_VGT_INDX_OFFSET, 0);
radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL,
- S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) |
- S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
+ S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
} else {
/* These registers, when written, also overwrite the
* CLEAR_STATE context, so we can't rely on CLEAR_STATE setting
cu_mask_ps = u_bit_consecutive(0, physical_device->rad_info.min_good_cu_per_sa);
if (physical_device->rad_info.gfx_level >= GFX7) {
- if (physical_device->rad_info.gfx_level >= GFX10 &&
- physical_device->rad_info.gfx_level < GFX11) {
+ if (physical_device->rad_info.gfx_level >= GFX10 && physical_device->rad_info.gfx_level < GFX11) {
/* Logical CUs 16 - 31 */
radeon_set_sh_reg_idx(physical_device, cs, R_00B104_SPI_SHADER_PGM_RSRC4_VS, 3,
- ac_apply_cu_en(S_00B104_CU_EN(0xffff),
- C_00B104_CU_EN, 16, &physical_device->rad_info));
+ ac_apply_cu_en(S_00B104_CU_EN(0xffff), C_00B104_CU_EN, 16, &physical_device->rad_info));
}
if (physical_device->rad_info.gfx_level >= GFX10) {
radeon_set_sh_reg_idx(physical_device, cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS, 3,
- ac_apply_cu_en(S_00B404_CU_EN(0xffff),
- C_00B404_CU_EN, 16, &physical_device->rad_info));
- radeon_set_sh_reg_idx(physical_device, cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS, 3,
- ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16),
- C_00B004_CU_EN, 16, &physical_device->rad_info));
+ ac_apply_cu_en(S_00B404_CU_EN(0xffff), C_00B404_CU_EN, 16, &physical_device->rad_info));
+ radeon_set_sh_reg_idx(
+ physical_device, cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS, 3,
+ ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16), C_00B004_CU_EN, 16, &physical_device->rad_info));
}
if (physical_device->rad_info.gfx_level >= GFX9) {
radeon_set_sh_reg_idx(physical_device, cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 3,
- ac_apply_cu_en(S_00B41C_CU_EN(0xffff) |
- S_00B41C_WAVE_LIMIT(0x3F),
- C_00B41C_CU_EN, 0, &physical_device->rad_info));
+ ac_apply_cu_en(S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), C_00B41C_CU_EN, 0,
+ &physical_device->rad_info));
} else {
radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
- ac_apply_cu_en(S_00B51C_CU_EN(0xffff) |
- S_00B51C_WAVE_LIMIT(0x3F),
- C_00B51C_CU_EN, 0, &physical_device->rad_info));
+ ac_apply_cu_en(S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F), C_00B51C_CU_EN, 0,
+ &physical_device->rad_info));
radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
- ac_apply_cu_en(S_00B31C_CU_EN(0xffff) |
- S_00B31C_WAVE_LIMIT(0x3F),
- C_00B31C_CU_EN, 0, &physical_device->rad_info));
+ ac_apply_cu_en(S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F), C_00B31C_CU_EN, 0,
+ &physical_device->rad_info));
/* If this is 0, Bonaire can hang even if GS isn't being used.
* Other chips are unaffected. These are suboptimal values,
* but we don't use on-chip GS.
}
radeon_set_sh_reg_idx(physical_device, cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 3,
- ac_apply_cu_en(S_00B01C_CU_EN(cu_mask_ps) |
- S_00B01C_WAVE_LIMIT(0x3F) |
- S_00B01C_LDS_GROUP_SIZE(physical_device->rad_info.gfx_level >= GFX11),
+ ac_apply_cu_en(S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F) |
+ S_00B01C_LDS_GROUP_SIZE(physical_device->rad_info.gfx_level >= GFX11),
C_00B01C_CU_EN, 0, &physical_device->rad_info));
}
* a single primitive shader subgroup.
*/
uint32_t max_deallocs_in_wave = physical_device->rad_info.gfx_level >= GFX11 ? 16 : 512;
- radeon_set_context_reg(cs, R_028C50_PA_SC_NGG_MODE_CNTL,
- S_028C50_MAX_DEALLOCS_IN_WAVE(max_deallocs_in_wave));
+ radeon_set_context_reg(cs, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(max_deallocs_in_wave));
if (physical_device->rad_info.gfx_level < GFX11)
radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
*/
unsigned vertex_reuse_depth = physical_device->rad_info.gfx_level >= GFX10_3 ? 30 : 0;
radeon_set_context_reg(cs, R_028838_PA_CL_NGG_CNTL,
- S_028838_INDEX_BUF_EDGE_FLAG_ENA(0) |
- S_028838_VERTEX_REUSE_DEPTH(vertex_reuse_depth));
+ S_028838_INDEX_BUF_EDGE_FLAG_ENA(0) | S_028838_VERTEX_REUSE_DEPTH(vertex_reuse_depth));
/* Enable CMASK/FMASK/HTILE/DCC caching in L2 for small chips. */
unsigned meta_write_policy, meta_read_policy;
- unsigned no_alloc = device->physical_device->rad_info.gfx_level >= GFX11
- ? V_02807C_CACHE_NOA_GFX11
- : V_02807C_CACHE_NOA_GFX10;
+ unsigned no_alloc =
+ device->physical_device->rad_info.gfx_level >= GFX11 ? V_02807C_CACHE_NOA_GFX11 : V_02807C_CACHE_NOA_GFX10;
/* TODO: investigate whether LRU improves performance on other chips too */
if (physical_device->rad_info.max_render_backends <= 4) {
meta_read_policy = no_alloc; /* don't cache reads */
}
- radeon_set_context_reg(
- cs, R_02807C_DB_RMI_L2_CACHE_CONTROL,
- S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) | S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) |
- S_02807C_HTILE_WR_POLICY(meta_write_policy) |
- S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) | S_02807C_Z_RD_POLICY(no_alloc) |
- S_02807C_S_RD_POLICY(no_alloc) | S_02807C_HTILE_RD_POLICY(meta_read_policy));
+ radeon_set_context_reg(cs, R_02807C_DB_RMI_L2_CACHE_CONTROL,
+ S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) | S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) |
+ S_02807C_HTILE_WR_POLICY(meta_write_policy) |
+ S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) | S_02807C_Z_RD_POLICY(no_alloc) |
+ S_02807C_S_RD_POLICY(no_alloc) | S_02807C_HTILE_RD_POLICY(meta_read_policy));
uint32_t gl2_cc;
if (device->physical_device->rad_info.gfx_level >= GFX11) {
S_028410_COLOR_WR_POLICY_GFX11(V_028410_CACHE_STREAM) |
S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX11);
} else {
- gl2_cc = S_028410_CMASK_WR_POLICY(meta_write_policy) |
- S_028410_FMASK_WR_POLICY(V_028410_CACHE_STREAM) |
+ gl2_cc = S_028410_CMASK_WR_POLICY(meta_write_policy) | S_028410_FMASK_WR_POLICY(V_028410_CACHE_STREAM) |
S_028410_DCC_WR_POLICY_GFX10(meta_write_policy) |
- S_028410_COLOR_WR_POLICY_GFX10(V_028410_CACHE_STREAM) |
- S_028410_CMASK_RD_POLICY(meta_read_policy) |
+ S_028410_COLOR_WR_POLICY_GFX10(V_028410_CACHE_STREAM) | S_028410_CMASK_RD_POLICY(meta_read_policy) |
S_028410_FMASK_RD_POLICY(V_028410_CACHE_NOA_GFX10) |
S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX10);
}
- radeon_set_context_reg(cs, R_028410_CB_RMI_GL2_CACHE_CONTROL,
- gl2_cc | S_028410_DCC_RD_POLICY(meta_read_policy));
+ radeon_set_context_reg(cs, R_028410_CB_RMI_GL2_CACHE_CONTROL, gl2_cc | S_028410_DCC_RD_POLICY(meta_read_policy));
radeon_set_context_reg(cs, R_028428_CB_COVERAGE_OUT_CONTROL, 0);
radeon_set_sh_reg_seq(cs, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 4);
if (physical_device->rad_info.gfx_level >= GFX10_3) {
radeon_set_context_reg(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
/* This allows sample shading. */
- radeon_set_context_reg(
- cs, R_028848_PA_CL_VRS_CNTL,
- S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE));
+ radeon_set_context_reg(cs, R_028848_PA_CL_VRS_CNTL,
+ S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE));
}
}
if (physical_device->rad_info.gfx_level >= GFX11) {
/* ACCUM fields changed their meaning. */
radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION,
- S_028B50_ACCUM_ISOLINE(128) | S_028B50_ACCUM_TRI(128) |
- S_028B50_ACCUM_QUAD(128) | S_028B50_DONUT_SPLIT_GFX9(24) |
- S_028B50_TRAP_SPLIT(6));
+ S_028B50_ACCUM_ISOLINE(128) | S_028B50_ACCUM_TRI(128) | S_028B50_ACCUM_QUAD(128) |
+ S_028B50_DONUT_SPLIT_GFX9(24) | S_028B50_TRAP_SPLIT(6));
} else if (physical_device->rad_info.gfx_level >= GFX9) {
radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION,
- S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) |
- S_028B50_ACCUM_QUAD(24) | S_028B50_DONUT_SPLIT_GFX9(24) |
- S_028B50_TRAP_SPLIT(6));
+ S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) |
+ S_028B50_DONUT_SPLIT_GFX9(24) | S_028B50_TRAP_SPLIT(6));
} else if (physical_device->rad_info.gfx_level >= GFX8) {
uint32_t vgt_tess_distribution;
- vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) |
- S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT_GFX81(16);
+ vgt_tess_distribution =
+ S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) | S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT_GFX81(16);
- if (physical_device->rad_info.family == CHIP_FIJI ||
- physical_device->rad_info.family >= CHIP_POLARIS10)
+ if (physical_device->rad_info.family == CHIP_FIJI || physical_device->rad_info.family >= CHIP_POLARIS10)
vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
radeon_set_context_reg(cs, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
if (physical_device->rad_info.gfx_level >= GFX7) {
- radeon_set_context_reg(cs, R_028084_TA_BC_BASE_ADDR_HI,
- S_028084_ADDRESS(border_color_va >> 40));
+ radeon_set_context_reg(cs, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40));
}
}
}
if (physical_device->rad_info.gfx_level >= GFX9) {
- radeon_set_context_reg(
- cs, R_028C48_PA_SC_BINNER_CNTL_1,
- S_028C48_MAX_ALLOC_COUNT(physical_device->rad_info.pbb_max_alloc_count - 1) |
- S_028C48_MAX_PRIM_PER_BATCH(1023));
- radeon_set_context_reg(cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
- S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
+ radeon_set_context_reg(cs, R_028C48_PA_SC_BINNER_CNTL_1,
+ S_028C48_MAX_ALLOC_COUNT(physical_device->rad_info.pbb_max_alloc_count - 1) |
+ S_028C48_MAX_PRIM_PER_BATCH(1023));
+ radeon_set_context_reg(cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
radeon_set_uconfig_reg(cs, R_030968_VGT_INSTANCE_BASE_ID, 0);
}
unsigned tmp = (unsigned)(1.0 * 8.0);
- radeon_set_context_reg(cs, R_028A00_PA_SU_POINT_SIZE,
- S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
- radeon_set_context_reg(cs, R_028A04_PA_SU_POINT_MINMAX,
- S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) |
- S_028A04_MAX_SIZE(radv_pack_float_12p4(8191.875 / 2)));
+ radeon_set_context_reg(cs, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
+ radeon_set_context_reg(
+ cs, R_028A04_PA_SU_POINT_MINMAX,
+ S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) | S_028A04_MAX_SIZE(radv_pack_float_12p4(8191.875 / 2)));
if (!has_clear_state) {
radeon_set_context_reg(cs, R_028004_DB_COUNT_CONTROL, S_028004_ZPASS_INCREMENT_DISABLE(1));
radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, small_prim_filter_cntl);
}
- radeon_set_context_reg(
- cs, R_0286D4_SPI_INTERP_CONTROL_0,
- S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(1) |
- S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
- S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
- S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
- S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
- S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */
+ radeon_set_context_reg(cs, R_0286D4_SPI_INTERP_CONTROL_0,
+ S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(1) |
+ S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
+ S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
+ S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
+ S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
+ S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */
radeon_set_context_reg(cs, R_028BE4_PA_SU_VTX_CNTL,
S_028BE4_PIX_CENTER(1) | S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) |
S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
radeon_set_context_reg(cs, R_028818_PA_CL_VTE_CNTL,
- S_028818_VTX_W0_FMT(1) | S_028818_VPORT_X_SCALE_ENA(1) |
- S_028818_VPORT_X_OFFSET_ENA(1) | S_028818_VPORT_Y_SCALE_ENA(1) |
- S_028818_VPORT_Y_OFFSET_ENA(1) | S_028818_VPORT_Z_SCALE_ENA(1) |
- S_028818_VPORT_Z_OFFSET_ENA(1));
+ S_028818_VTX_W0_FMT(1) | S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
+ S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
+ S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
if (device->tma_bo) {
uint64_t tba_va, tma_va;
tba_va = radv_shader_get_va(device->trap_handler_shader);
tma_va = radv_buffer_get_va(device->tma_bo);
- uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS, R_00B100_SPI_SHADER_TBA_LO_VS,
- R_00B200_SPI_SHADER_TBA_LO_GS, R_00B300_SPI_SHADER_TBA_LO_ES,
- R_00B400_SPI_SHADER_TBA_LO_HS, R_00B500_SPI_SHADER_TBA_LO_LS};
+ uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS, R_00B100_SPI_SHADER_TBA_LO_VS, R_00B200_SPI_SHADER_TBA_LO_GS,
+ R_00B300_SPI_SHADER_TBA_LO_ES, R_00B400_SPI_SHADER_TBA_LO_HS, R_00B500_SPI_SHADER_TBA_LO_LS};
for (i = 0; i < ARRAY_SIZE(regs); ++i) {
radeon_set_sh_reg_seq(cs, regs[i], 4);
if (physical_device->rad_info.gfx_level >= GFX11) {
radeon_set_context_reg(cs, R_028C54_PA_SC_BINNER_CNTL_2, 0);
- radeon_set_context_reg(cs, R_028620_PA_RATE_CNTL,
- S_028620_VERTEX_RATE(2) | S_028620_PRIM_RATE(1));
+ radeon_set_context_reg(cs, R_028620_PA_RATE_CNTL, S_028620_VERTEX_RATE(2) | S_028620_PRIM_RATE(1));
uint64_t rb_mask = BITFIELD64_MASK(physical_device->rad_info.max_render_backends);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_CONTROL) | EVENT_INDEX(1));
- radeon_emit(cs, PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) |
- PIXEL_PIPE_STATE_CNTL_STRIDE(2) |
- PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(rb_mask));
+ radeon_emit(cs, PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) | PIXEL_PIPE_STATE_CNTL_STRIDE(2) |
+ PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(rb_mask));
radeon_emit(cs, PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask));
radeon_set_uconfig_reg(cs, R_031110_SPI_GS_THROTTLE_CNTL1, 0x12355123);
*/
bool exclusion = physical_device->rad_info.gfx_level >= GFX7;
radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
- S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) |
- S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
+ S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
si_emit_compute(device, cs);
}
radeon_emit(cs, PKT3_NOP_PAD);
}
- VkResult result =
- device->ws->buffer_create(device->ws, cs->cdw * 4, 4096, device->ws->cs_domain(device->ws),
- RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
- RADV_BO_PRIORITY_CS, 0, &device->gfx_init);
+ VkResult result = device->ws->buffer_create(
+ device->ws, cs->cdw * 4, 4096, device->ws->cs_domain(device->ws),
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
+ RADV_BO_PRIORITY_CS, 0, &device->gfx_init);
if (result != VK_SUCCESS)
goto fail;
VkRect2D ret;
ret.offset.x = MAX2(a->offset.x, b->offset.x);
ret.offset.y = MAX2(a->offset.y, b->offset.y);
- ret.extent.width =
- MIN2(a->offset.x + a->extent.width, b->offset.x + b->extent.width) - ret.offset.x;
- ret.extent.height =
- MIN2(a->offset.y + a->extent.height, b->offset.y + b->extent.height) - ret.offset.y;
+ ret.extent.width = MIN2(a->offset.x + a->extent.width, b->offset.x + b->extent.width) - ret.offset.x;
+ ret.extent.height = MIN2(a->offset.y + a->extent.height, b->offset.y + b->extent.height) - ret.offset.y;
return ret;
}
void
-si_write_scissors(struct radeon_cmdbuf *cs, int count, const VkRect2D *scissors,
- const VkViewport *viewports)
+si_write_scissors(struct radeon_cmdbuf *cs, int count, const VkRect2D *scissors, const VkViewport *viewports)
{
int i;
VkRect2D viewport_scissor = si_scissor_from_viewport(viewports + i);
VkRect2D scissor = si_intersect_scissor(&scissors[i], &viewport_scissor);
- radeon_emit(cs, S_028250_TL_X(scissor.offset.x) | S_028250_TL_Y(scissor.offset.y) |
- S_028250_WINDOW_OFFSET_DISABLE(1));
+ radeon_emit(
+ cs, S_028250_TL_X(scissor.offset.x) | S_028250_TL_Y(scissor.offset.y) | S_028250_WINDOW_OFFSET_DISABLE(1));
radeon_emit(cs, S_028254_BR_X(scissor.offset.x + scissor.extent.width) |
S_028254_BR_Y(scissor.offset.y + scissor.extent.height));
}
}
void
-si_write_guardband(struct radeon_cmdbuf *cs, int count, const VkViewport *viewports,
- unsigned rast_prim, unsigned polygon_mode, float line_width)
+si_write_guardband(struct radeon_cmdbuf *cs, int count, const VkViewport *viewports, unsigned rast_prim,
+ unsigned polygon_mode, float line_width)
{
- const bool draw_points =
- radv_rast_prim_is_point(rast_prim) || radv_polygon_mode_is_point(polygon_mode);
- const bool draw_lines =
- radv_rast_prim_is_line(rast_prim) || radv_polygon_mode_is_line(polygon_mode);
+ const bool draw_points = radv_rast_prim_is_point(rast_prim) || radv_polygon_mode_is_point(polygon_mode);
+ const bool draw_lines = radv_rast_prim_is_line(rast_prim) || radv_polygon_mode_is_line(polygon_mode);
int i;
float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY;
float discard_x = 1.0f, discard_y = 1.0f;
};
uint32_t
-si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
- bool indirect_draw, bool count_from_stream_output,
- uint32_t draw_vertex_count, unsigned topology, bool prim_restart_enable,
- unsigned patch_control_points, unsigned num_tess_patches)
+si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, bool indirect_draw,
+ bool count_from_stream_output, uint32_t draw_vertex_count, unsigned topology,
+ bool prim_restart_enable, unsigned patch_control_points, unsigned num_tess_patches)
{
const struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
const unsigned max_primgroup_in_wave = 2;
/* WD_SWITCH_ON_EOP has no effect on GPUs with less than
* 4 shader engines. Set 1 to pass the assertion below.
* The other cases are hardware requirements. */
- if (info->max_se < 4 || topology == V_008958_DI_PT_POLYGON ||
- topology == V_008958_DI_PT_LINELOOP || topology == V_008958_DI_PT_TRIFAN ||
- topology == V_008958_DI_PT_TRISTRIP_ADJ ||
- (prim_restart_enable &&
- (info->family < CHIP_POLARIS10 ||
- (topology != V_008958_DI_PT_POINTLIST && topology != V_008958_DI_PT_LINESTRIP))))
+ if (info->max_se < 4 || topology == V_008958_DI_PT_POLYGON || topology == V_008958_DI_PT_LINELOOP ||
+ topology == V_008958_DI_PT_TRIFAN || topology == V_008958_DI_PT_TRISTRIP_ADJ ||
+ (prim_restart_enable && (info->family < CHIP_POLARIS10 ||
+ (topology != V_008958_DI_PT_POINTLIST && topology != V_008958_DI_PT_LINESTRIP))))
wd_switch_on_eop = true;
/* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0.
(info->family == CHIP_HAWAII ||
(info->gfx_level == GFX8 &&
/* max primgroup in wave is always 2 - leave this for documentation */
- (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY) ||
- max_primgroup_in_wave != 2))))
+ (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY) || max_primgroup_in_wave != 2))))
partial_vs_wave = true;
/* Instancing bug on Bonaire. */
/* Workaround for a VGT hang when strip primitive types are used with
* primitive restart.
*/
- if (prim_restart_enable &&
- (topology == V_008958_DI_PT_LINESTRIP || topology == V_008958_DI_PT_TRISTRIP ||
- topology == V_008958_DI_PT_LINESTRIP_ADJ || topology == V_008958_DI_PT_TRISTRIP_ADJ)) {
+ if (prim_restart_enable && (topology == V_008958_DI_PT_LINESTRIP || topology == V_008958_DI_PT_TRISTRIP ||
+ topology == V_008958_DI_PT_LINESTRIP_ADJ || topology == V_008958_DI_PT_TRISTRIP_ADJ)) {
partial_vs_wave = true;
}
return cmd_buffer->state.ia_multi_vgt_param.base | S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
- S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
- S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
+ S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
S_028AA8_WD_SWITCH_ON_EOP(info->gfx_level >= GFX7 ? wd_switch_on_eop : 0);
}
void
-si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec,
- unsigned event, unsigned event_flags, unsigned dst_sel,
- unsigned data_sel, uint64_t va, uint32_t new_fence,
+si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec, unsigned event,
+ unsigned event_flags, unsigned dst_sel, unsigned data_sel, uint64_t va, uint32_t new_fence,
uint64_t gfx9_eop_bug_va)
{
- unsigned op = EVENT_TYPE(event) |
- EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) |
- event_flags;
+ unsigned op =
+ EVENT_TYPE(event) | EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) | event_flags;
unsigned is_gfx8_mec = is_mec && gfx_level < GFX9;
unsigned sel = EOP_DST_SEL(dst_sel) | EOP_DATA_SEL(data_sel);
* queue.
*/
if (event == V_028B9C_CS_DONE || event == V_028B9C_PS_DONE) {
- assert(event_flags == 0 && dst_sel == EOP_DST_SEL_MEM &&
- data_sel == EOP_DATA_SEL_VALUE_32BIT);
+ assert(event_flags == 0 && dst_sel == EOP_DST_SEL_MEM && data_sel == EOP_DATA_SEL_VALUE_32BIT);
if (is_mec) {
radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, false));
void
radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask)
{
- assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL ||
- op == WAIT_REG_MEM_GREATER_OR_EQUAL);
+ assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL || op == WAIT_REG_MEM_GREATER_OR_EQUAL);
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false));
radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1));
}
static void
-gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
- uint32_t *flush_cnt, uint64_t flush_va, bool is_mec,
- enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits,
- uint64_t gfx9_eop_bug_va)
+gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, uint32_t *flush_cnt,
+ uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits,
+ enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va)
{
uint32_t gcr_cntl = 0;
unsigned cb_db_event = 0;
unsigned gl2_wb = G_586_GL2_WB(gcr_cntl);
unsigned gcr_seq = G_586_SEQ(gcr_cntl);
- gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GLK_WB & C_586_GLK_INV &
- C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */
+ gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GLK_WB & C_586_GLK_INV & C_586_GLV_INV & C_586_GL1_INV &
+ C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */
/* Send an event that flushes caches. */
radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0));
- radeon_emit(cs, S_490_EVENT_TYPE(cb_db_event) |
- S_490_EVENT_INDEX(5) |
- S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) |
- S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) |
- S_490_SEQ(gcr_seq) | S_490_GLK_WB(glk_wb) | S_490_GLK_INV(glk_inv) |
- S_490_PWS_ENABLE(1));
+ radeon_emit(cs, S_490_EVENT_TYPE(cb_db_event) | S_490_EVENT_INDEX(5) | S_490_GLM_WB(glm_wb) |
+ S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) | S_490_GL1_INV(gl1_inv) |
+ S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) | S_490_SEQ(gcr_seq) | S_490_GLK_WB(glk_wb) |
+ S_490_GLK_INV(glk_inv) | S_490_PWS_ENABLE(1));
radeon_emit(cs, 0); /* DST_SEL, INT_SEL, DATA_SEL */
radeon_emit(cs, 0); /* ADDRESS_LO */
radeon_emit(cs, 0); /* ADDRESS_HI */
/* Wait for the event and invalidate remaining caches if needed. */
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
- radeon_emit(cs, S_580_PWS_STAGE_SEL(V_580_CP_PFP) |
- S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) |
- S_580_PWS_ENA2(1) |
- S_580_PWS_COUNT(0));
+ radeon_emit(cs, S_580_PWS_STAGE_SEL(V_580_CP_PFP) | S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) |
+ S_580_PWS_ENA2(1) | S_580_PWS_COUNT(0));
radeon_emit(cs, 0xffffffff); /* GCR_SIZE */
radeon_emit(cs, 0x01ffffff); /* GCR_SIZE_HI */
- radeon_emit(cs, 0); /* GCR_BASE_LO */
- radeon_emit(cs, 0); /* GCR_BASE_HI */
+ radeon_emit(cs, 0); /* GCR_BASE_LO */
+ radeon_emit(cs, 0); /* GCR_BASE_HI */
radeon_emit(cs, S_585_PWS_ENA(1));
radeon_emit(cs, gcr_cntl); /* GCR_CNTL */
unsigned gl2_wb = G_586_GL2_WB(gcr_cntl);
unsigned gcr_seq = G_586_SEQ(gcr_cntl);
- gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV &
- C_586_GL2_WB; /* keep SEQ */
+ gcr_cntl &=
+ C_586_GLM_WB & C_586_GLM_INV & C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */
assert(flush_cnt);
(*flush_cnt)++;
- si_cs_emit_write_event_eop(
- cs, gfx_level, false, cb_db_event,
- S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) |
- S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) |
- S_490_SEQ(gcr_seq),
- EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va);
+ si_cs_emit_write_event_eop(cs, gfx_level, false, cb_db_event,
+ S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) |
+ S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) |
+ S_490_SEQ(gcr_seq),
+ EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va);
radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff);
}
radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
radeon_emit(cs, gcr_cntl); /* GCR_CNTL */
- } else if ((cb_db_event ||
- (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) &&
+ } else if ((cb_db_event || (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) &&
!is_mec) {
/* We need to ensure that PFP waits as well. */
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
}
void
-si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs,
- enum amd_gfx_level gfx_level, uint32_t *flush_cnt, uint64_t flush_va,
- bool is_mec, enum radv_cmd_flush_bits flush_bits,
+si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
+ uint32_t *flush_cnt, uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits,
enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va)
{
unsigned cp_coher_cntl = 0;
- uint32_t flush_cb_db =
- flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB);
+ uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB);
radeon_check_space(ws, cs, 128);
if (gfx_level >= GFX10) {
/* GFX10 cache flush handling is quite different. */
- gfx10_cs_emit_cache_flush(cs, gfx_level, flush_cnt, flush_va, is_mec, flush_bits,
- sqtt_flush_bits, gfx9_eop_bug_va);
+ gfx10_cs_emit_cache_flush(cs, gfx_level, flush_cnt, flush_va, is_mec, flush_bits, sqtt_flush_bits,
+ gfx9_eop_bug_va);
return;
}
if (gfx_level <= GFX8) {
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
- cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) |
- S_0085F0_CB1_DEST_BASE_ENA(1) | S_0085F0_CB2_DEST_BASE_ENA(1) |
- S_0085F0_CB3_DEST_BASE_ENA(1) | S_0085F0_CB4_DEST_BASE_ENA(1) |
- S_0085F0_CB5_DEST_BASE_ENA(1) | S_0085F0_CB6_DEST_BASE_ENA(1) |
- S_0085F0_CB7_DEST_BASE_ENA(1);
+ cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) |
+ S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) |
+ S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) |
+ S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1);
/* Necessary for DCC */
if (gfx_level >= GFX8) {
- si_cs_emit_write_event_eop(cs, gfx_level, is_mec, V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0,
- EOP_DST_SEL_MEM, EOP_DATA_SEL_DISCARD, 0, 0,
- gfx9_eop_bug_va);
+ si_cs_emit_write_event_eop(cs, gfx_level, is_mec, V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0, EOP_DST_SEL_MEM,
+ EOP_DATA_SEL_DISCARD, 0, 0, gfx9_eop_bug_va);
}
*sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB;
*/
tc_flags = EVENT_TC_ACTION_ENA | EVENT_TC_MD_ACTION_ENA;
- *sqtt_flush_bits |=
- RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB | RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
+ *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB | RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
/* Ideally flush TC together with CB/DB. */
if (flush_bits & RADV_CMD_FLAG_INV_L2) {
assert(flush_cnt);
(*flush_cnt)++;
- si_cs_emit_write_event_eop(cs, gfx_level, false, cb_db_event, tc_flags, EOP_DST_SEL_MEM,
- EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va);
+ si_cs_emit_write_event_eop(cs, gfx_level, false, cb_db_event, tc_flags, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT,
+ flush_va, *flush_cnt, gfx9_eop_bug_va);
radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff);
}
*sqtt_flush_bits |= RGP_FLUSH_PFP_SYNC_ME;
}
- if ((flush_bits & RADV_CMD_FLAG_INV_L2) ||
- (gfx_level <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
+ if ((flush_bits & RADV_CMD_FLAG_INV_L2) || (gfx_level <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
si_emit_acquire_mem(cs, is_mec, gfx_level == GFX9,
cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) |
S_0301F0_TC_WB_ACTION_ENA(gfx_level >= GFX8));
*
* WB doesn't work without NC.
*/
- si_emit_acquire_mem(
- cs, is_mec, gfx_level == GFX9,
- cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1));
+ si_emit_acquire_mem(cs, is_mec, gfx_level == GFX9,
+ cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1));
cp_coher_cntl = 0;
*sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2 | RGP_FLUSH_INVAL_VMEM_L0;
}
if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
- si_emit_acquire_mem(cs, is_mec, gfx_level == GFX9,
- cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1));
+ si_emit_acquire_mem(cs, is_mec, gfx_level == GFX9, cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1));
cp_coher_cntl = 0;
*sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0;
if (is_compute)
cmd_buffer->state.flush_bits &=
- ~(RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
- RADV_CMD_FLAG_INV_L2_METADATA | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_VGT_FLUSH |
- RADV_CMD_FLAG_START_PIPELINE_STATS | RADV_CMD_FLAG_STOP_PIPELINE_STATS);
+ ~(RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | RADV_CMD_FLAG_FLUSH_AND_INV_DB |
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB_META | RADV_CMD_FLAG_INV_L2_METADATA | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_VGT_FLUSH | RADV_CMD_FLAG_START_PIPELINE_STATS |
+ RADV_CMD_FLAG_STOP_PIPELINE_STATS);
if (!cmd_buffer->state.flush_bits) {
radv_describe_barrier_end_delayed(cmd_buffer);
}
si_cs_emit_cache_flush(cmd_buffer->device->ws, cmd_buffer->cs,
- cmd_buffer->device->physical_device->rad_info.gfx_level,
- &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va,
- radv_cmd_buffer_uses_mec(cmd_buffer), cmd_buffer->state.flush_bits,
+ cmd_buffer->device->physical_device->rad_info.gfx_level, &cmd_buffer->gfx9_fence_idx,
+ cmd_buffer->gfx9_fence_va, radv_cmd_buffer_uses_mec(cmd_buffer), cmd_buffer->state.flush_bits,
&cmd_buffer->state.sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va);
if (unlikely(cmd_buffer->device->trace_bo))
/* sets the CP predication state using a boolean stored at va */
void
-si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible,
- unsigned pred_op, uint64_t va)
+si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, unsigned pred_op, uint64_t va)
{
uint32_t op = 0;
static inline unsigned
cp_dma_max_byte_count(enum amd_gfx_level gfx_level)
{
- unsigned max = gfx_level >= GFX11 ? 32767 :
- gfx_level >= GFX9 ? S_415_BYTE_COUNT_GFX9(~0u) : S_415_BYTE_COUNT_GFX6(~0u);
+ unsigned max = gfx_level >= GFX11 ? 32767
+ : gfx_level >= GFX9 ? S_415_BYTE_COUNT_GFX9(~0u)
+ : S_415_BYTE_COUNT_GFX6(~0u);
/* make it aligned for optimal performance */
return max & ~(SI_CPDMA_ALIGNMENT - 1);
* clear value.
*/
static void
-si_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool predicating,
- uint64_t dst_va, uint64_t src_va, unsigned size, unsigned flags)
+si_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool predicating, uint64_t dst_va,
+ uint64_t src_va, unsigned size, unsigned flags)
{
uint32_t header = 0, command = 0;
command |= S_415_RAW_WAIT(1);
/* Src and dst flags. */
- if (device->physical_device->rad_info.gfx_level >= GFX9 && !(flags & CP_DMA_CLEAR) &&
- src_va == dst_va)
+ if (device->physical_device->rad_info.gfx_level >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va)
header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */
else if (flags & CP_DMA_USE_L2)
header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
}
static void
-si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src_va, unsigned size,
- unsigned flags)
+si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src_va, unsigned size, unsigned flags)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_device *device = cmd_buffer->device;
}
void
-si_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
- unsigned size, bool predicating)
+si_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size,
+ bool predicating)
{
struct radeon_winsys *ws = device->ws;
enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
radeon_check_space(ws, cs, 9);
uint64_t aligned_va = va & ~(SI_CPDMA_ALIGNMENT - 1);
- uint64_t aligned_size =
- ((va + size + SI_CPDMA_ALIGNMENT - 1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va;
+ uint64_t aligned_size = ((va + size + SI_CPDMA_ALIGNMENT - 1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va;
if (gfx_level >= GFX9) {
- command |= S_415_BYTE_COUNT_GFX9(aligned_size) |
- S_415_DISABLE_WR_CONFIRM_GFX9(1);
+ command |= S_415_BYTE_COUNT_GFX9(aligned_size) | S_415_DISABLE_WR_CONFIRM_GFX9(1);
header |= S_411_DST_SEL(V_411_NOWHERE);
} else {
- command |= S_415_BYTE_COUNT_GFX6(aligned_size) |
- S_415_DISABLE_WR_CONFIRM_GFX6(1);
+ command |= S_415_BYTE_COUNT_GFX6(aligned_size) | S_415_DISABLE_WR_CONFIRM_GFX6(1);
header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
}
void
si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size)
{
- si_cs_cp_dma_prefetch(cmd_buffer->device, cmd_buffer->cs, va, size,
- cmd_buffer->state.predicating);
+ si_cs_cp_dma_prefetch(cmd_buffer->device, cmd_buffer->cs, va, size, cmd_buffer->state.predicating);
if (unlikely(cmd_buffer->device->trace_bo))
radv_cmd_buffer_trace_emit(cmd_buffer);
}
static void
-si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count, uint64_t remaining_size,
- unsigned *flags)
+si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count, uint64_t remaining_size, unsigned *flags)
{
/* Flush the caches for the first copy only.
}
void
-si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va,
- uint64_t size)
+si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, uint64_t size)
{
enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level;
uint64_t main_src_va, main_dest_va;
}
void
-si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size,
- unsigned value)
+si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, unsigned value)
{
if (!size)
return;
}
/* For MSAA sample positions. */
-#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
- ((((unsigned)(s0x)&0xf) << 0) | (((unsigned)(s0y)&0xf) << 4) | (((unsigned)(s1x)&0xf) << 8) | \
- (((unsigned)(s1y)&0xf) << 12) | (((unsigned)(s2x)&0xf) << 16) | \
- (((unsigned)(s2y)&0xf) << 20) | (((unsigned)(s3x)&0xf) << 24) | (((unsigned)(s3y)&0xf) << 28))
+#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
+ ((((unsigned)(s0x)&0xf) << 0) | (((unsigned)(s0y)&0xf) << 4) | (((unsigned)(s1x)&0xf) << 8) | \
+ (((unsigned)(s1y)&0xf) << 12) | (((unsigned)(s2x)&0xf) << 16) | (((unsigned)(s2y)&0xf) << 20) | \
+ (((unsigned)(s3x)&0xf) << 24) | (((unsigned)(s3y)&0xf) << 28))
/* For obtaining location coordinates from registers */
#define SEXT4(x) ((int)((x) | ((x)&0x8 ? 0xfffffff0 : 0)))
}
static void
-radv_get_sample_position(struct radv_device *device, unsigned sample_count, unsigned sample_index,
- float *out_value)
+radv_get_sample_position(struct radv_device *device, unsigned sample_count, unsigned sample_index, float *out_value)
{
const uint32_t *sample_locs;
}
static inline enum pipe_swizzle
-radv_swizzle_conv(VkComponentSwizzle component, const unsigned char chan[4],
- VkComponentSwizzle vk_swiz)
+radv_swizzle_conv(VkComponentSwizzle component, const unsigned char chan[4], VkComponentSwizzle vk_swiz)
{
if (vk_swiz == VK_COMPONENT_SWIZZLE_IDENTITY)
vk_swiz = component;
}
static inline void
-vk_format_compose_swizzles(const VkComponentMapping *mapping, const unsigned char swz[4],
- enum pipe_swizzle dst[4])
+vk_format_compose_swizzles(const VkComponentMapping *mapping, const unsigned char swz[4], enum pipe_swizzle dst[4])
{
dst[0] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_R, swz, mapping->r);
dst[1] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_G, swz, mapping->g);
static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo);
static int
-radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, amdgpu_bo_handle bo, uint64_t offset,
- uint64_t size, uint64_t addr, uint32_t bo_flags, uint64_t internal_flags,
- uint32_t ops)
+radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, amdgpu_bo_handle bo, uint64_t offset, uint64_t size, uint64_t addr,
+ uint32_t bo_flags, uint64_t internal_flags, uint32_t ops)
{
uint64_t flags = internal_flags;
if (bo) {
{
if (bo->bo_capacity < bo->range_count) {
uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
- struct radv_amdgpu_winsys_bo **bos =
- realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
+ struct radv_amdgpu_winsys_bo **bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
if (!bos)
return VK_ERROR_OUT_OF_HOST_MEMORY;
bo->bos = bos;
}
static VkResult
-radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_winsys_bo *_parent,
- uint64_t offset, uint64_t size, struct radeon_winsys_bo *_bo,
- uint64_t bo_offset)
+radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_winsys_bo *_parent, uint64_t offset,
+ uint64_t size, struct radeon_winsys_bo *_bo, uint64_t bo_offset)
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
* will first unmap all existing VA that overlap the requested range and then map.
*/
if (bo) {
- r = radv_amdgpu_bo_va_op(ws, bo->bo, bo_offset, size, parent->base.va + offset, 0, 0,
- AMDGPU_VA_OP_REPLACE);
+ r = radv_amdgpu_bo_va_op(ws, bo->bo, bo_offset, size, parent->base.va + offset, 0, 0, AMDGPU_VA_OP_REPLACE);
} else {
- r = radv_amdgpu_bo_va_op(ws, NULL, 0, size, parent->base.va + offset, 0, AMDGPU_VM_PAGE_PRT,
- AMDGPU_VA_OP_REPLACE);
+ r =
+ radv_amdgpu_bo_va_op(ws, NULL, 0, size, parent->base.va + offset, 0, AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_REPLACE);
}
if (r) {
* new parent, or are adjacent to it. This corresponds to the bind ranges
* that may change.
*/
- while (first + 1 < parent->range_count &&
- parent->ranges[first].offset + parent->ranges[first].size < offset)
+ while (first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
++first;
last = first;
/* Try to merge the new range with the first range. */
if (parent->ranges[first].bo == bo &&
- (!bo ||
- offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
+ (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
size += offset - parent->ranges[first].offset;
offset = parent->ranges[first].offset;
bo_offset = parent->ranges[first].bo_offset;
/* Try to merge the new range with the last range. */
if (parent->ranges[last].bo == bo &&
- (!bo ||
- offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
+ (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
size = parent->ranges[last].offset + parent->ranges[last].size - offset;
remove_last = true;
}
u_rwlock_wrlock(&ws->global_bo_list.lock);
if (ws->global_bo_list.count == ws->global_bo_list.capacity) {
unsigned capacity = MAX2(4, ws->global_bo_list.capacity * 2);
- void *data =
- realloc(ws->global_bo_list.bos, capacity * sizeof(struct radv_amdgpu_winsys_bo *));
+ void *data = realloc(ws->global_bo_list.bos, capacity * sizeof(struct radv_amdgpu_winsys_bo *));
if (!data) {
u_rwlock_wrunlock(&ws->global_bo_list.lock);
return VK_ERROR_OUT_OF_HOST_MEMORY;
static VkResult
radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
- enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags,
- unsigned priority, uint64_t replay_address,
- struct radeon_winsys_bo **out_bo)
+ enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags, unsigned priority,
+ uint64_t replay_address, struct radeon_winsys_bo **out_bo)
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
struct radv_amdgpu_winsys_bo *bo;
assert(!replay_address || (flags & RADEON_FLAG_REPLAYABLE));
- const uint64_t va_flags = AMDGPU_VA_RANGE_HIGH |
- (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
+ const uint64_t va_flags = AMDGPU_VA_RANGE_HIGH | (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
(flags & RADEON_FLAG_REPLAYABLE ? AMDGPU_VA_RANGE_REPLAYABLE : 0);
- r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, replay_address,
- &va, &va_handle, va_flags);
+ r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, replay_address, &va,
+ &va_handle, va_flags);
if (r) {
- result =
- replay_address ? VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ result = replay_address ? VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
goto error_va_alloc;
}
bo->ranges[0].bo_offset = 0;
/* Reserve a PRT VA region. */
- r = radv_amdgpu_bo_va_op(ws, NULL, 0, size, bo->base.va, 0, AMDGPU_VM_PAGE_PRT,
- AMDGPU_VA_OP_MAP);
+ r = radv_amdgpu_bo_va_op(ws, NULL, 0, size, bo->base.va, 0, AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
if (r) {
fprintf(stderr, "radv/amdgpu: Failed to reserve a PRT VA region (%d).\n", r);
result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
}
static uint64_t
-radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, uint64_t size,
- unsigned alignment)
+radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, uint64_t size, unsigned alignment)
{
uint64_t vm_alignment = alignment;
}
static VkResult
-radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_t size,
- unsigned priority, struct radeon_winsys_bo **out_bo)
+radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_t size, unsigned priority,
+ struct radeon_winsys_bo **out_bo)
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
amdgpu_bo_handle buf_handle;
*/
vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size, ws->info.gart_page_size);
- if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, vm_alignment, 0, &va,
- &va_handle, AMDGPU_VA_RANGE_HIGH)) {
+ if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, vm_alignment, 0, &va, &va_handle,
+ AMDGPU_VA_RANGE_HIGH)) {
result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
goto error_va_alloc;
}
}
static VkResult
-radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priority,
- struct radeon_winsys_bo **out_bo, uint64_t *alloc_size)
+radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priority, struct radeon_winsys_bo **out_bo,
+ uint64_t *alloc_size)
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
struct radv_amdgpu_winsys_bo *bo;
*alloc_size = info.alloc_size;
}
- r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, result.alloc_size, 1 << 20, 0,
- &va, &va_handle, AMDGPU_VA_RANGE_HIGH);
+ r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, result.alloc_size, 1 << 20, 0, &va, &va_handle,
+ AMDGPU_VA_RANGE_HIGH);
if (r) {
vk_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
goto error_query;
}
- r =
- radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size, va, 0, 0, AMDGPU_VA_OP_MAP);
+ r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size, va, 0, 0, AMDGPU_VA_OP_MAP);
if (r) {
vk_result = VK_ERROR_UNKNOWN;
goto error_va_map;
tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256b);
tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64b_blocks);
- tiling_flags |=
- AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128b_blocks);
- tiling_flags |=
- AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size);
+ tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128b_blocks);
+ tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size);
tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
} else {
if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
if (md->u.legacy.tile_split)
- tiling_flags |=
- AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
+ tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks) - 1);
}
static VkResult
-radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
- bool resident)
+radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, bool resident)
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
return bo_a->base.va < bo_b->base.va ? -1 : bo_a->base.va > bo_b->base.va ? 1 : 0;
}
-static uint64_t radv_amdgpu_canonicalize_va(uint64_t va)
+static uint64_t
+radv_amdgpu_canonicalize_va(uint64_t va)
{
/* Would be less hardcoded to use addr32_hi (0xffff8000) to generate a mask,
* but there are confusing differences between page fault reports from kernel where
u_rwlock_rdlock(&ws->log_bo_list_lock);
LIST_FOR_EACH_ENTRY (bo_log, &ws->log_bo_list, list) {
- fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n",
- (long long)bo_log->timestamp, (long long)radv_amdgpu_canonicalize_va(bo_log->va),
- (long long)radv_amdgpu_canonicalize_va(bo_log->va + bo_log->size),
- bo_log->destroyed, bo_log->is_virtual);
+ fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n", (long long)bo_log->timestamp,
+ (long long)radv_amdgpu_canonicalize_va(bo_log->va),
+ (long long)radv_amdgpu_canonicalize_va(bo_log->va + bo_log->size), bo_log->destroyed, bo_log->is_virtual);
}
u_rwlock_rdunlock(&ws->log_bo_list_lock);
}
qsort(bos, ws->global_bo_list.count, sizeof(bos[0]), radv_amdgpu_bo_va_compare);
for (i = 0; i < ws->global_bo_list.count; ++i) {
- fprintf(file, " VA=%.16llx-%.16llx, handle=%d%s\n",
- (long long)radv_amdgpu_canonicalize_va(bos[i]->base.va),
- (long long)radv_amdgpu_canonicalize_va(bos[i]->base.va + bos[i]->size),
- bos[i]->bo_handle, bos[i]->is_virtual ? " sparse" : "");
+ fprintf(file, " VA=%.16llx-%.16llx, handle=%d%s\n", (long long)radv_amdgpu_canonicalize_va(bos[i]->base.va),
+ (long long)radv_amdgpu_canonicalize_va(bos[i]->base.va + bos[i]->size), bos[i]->bo_handle,
+ bos[i]->is_virtual ? " sparse" : "");
}
free(bos);
u_rwlock_rdunlock(&ws->global_bo_list.lock);
cs->buf[cs->cdw++] = value;
}
-static uint32_t radv_amdgpu_ctx_queue_syncobj(struct radv_amdgpu_ctx *ctx, unsigned ip,
- unsigned ring);
+static uint32_t radv_amdgpu_ctx_queue_syncobj(struct radv_amdgpu_ctx *ctx, unsigned ip, unsigned ring);
static inline struct radv_amdgpu_cs *
radv_amdgpu_cs(struct radeon_cmdbuf *base)
}
static bool
-ring_can_use_ib_bos(const struct radv_amdgpu_winsys *ws,
- enum amd_ip_type ip_type)
+ring_can_use_ib_bos(const struct radv_amdgpu_winsys *ws, enum amd_ip_type ip_type)
{
return ws->use_ib_bos && (ip_type == AMD_IP_GFX || ip_type == AMD_IP_COMPUTE);
}
uint64_t seq_no;
};
-static VkResult radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx,
- struct radv_amdgpu_cs_request *request,
+static VkResult radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request *request,
struct radv_winsys_sem_info *sem_info);
static void
const bool avoid_vram = cs->is_secondary && !can_always_use_ib2;
const enum radeon_bo_domain domain = avoid_vram ? RADEON_DOMAIN_GTT : radv_amdgpu_cs_domain(ws);
const enum radeon_bo_flag gtt_wc_flag = avoid_vram ? 0 : RADEON_FLAG_GTT_WC;
- const enum radeon_bo_flag flags = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_READ_ONLY | gtt_wc_flag;
+ const enum radeon_bo_flag flags =
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | gtt_wc_flag;
- return ws->buffer_create(ws, ib_size, cs->ws->info.ib_alignment, domain, flags,
- RADV_BO_PRIORITY_CS, 0, &cs->ib_buffer);
+ return ws->buffer_create(ws, ib_size, cs->ws->info.ib_alignment, domain, flags, RADV_BO_PRIORITY_CS, 0,
+ &cs->ib_buffer);
}
static struct radeon_cmdbuf *
return &cs->base;
}
-static uint32_t get_nop_packet(struct radv_amdgpu_cs *cs)
+static uint32_t
+get_nop_packet(struct radv_amdgpu_cs *cs)
{
- switch(cs->hw_ip) {
+ switch (cs->hw_ip) {
case AMDGPU_HW_IP_GFX:
case AMDGPU_HW_IP_COMPUTE:
- return cs->ws->info.gfx_ib_pad_with_type2 ? PKT2_NOP_PAD : PKT3_NOP_PAD;
+ return cs->ws->info.gfx_ib_pad_with_type2 ? PKT2_NOP_PAD : PKT3_NOP_PAD;
case AMDGPU_HW_IP_DMA:
return cs->ws->info.gfx_level <= GFX6 ? 0xF0000000 : SDMA_NOP_PAD;
case AMDGPU_HW_IP_UVD:
}
for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
- unsigned hash =
- ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
+ unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
cs->virtual_buffer_hash_table[hash] = -1;
}
cs->buf[cs->cdw - 4] = PKT3(PKT3_INDIRECT_BUFFER, 2, 0);
cs->buf[cs->cdw - 3] = next_acs->ib.ib_mc_address;
cs->buf[cs->cdw - 2] = next_acs->ib.ib_mc_address >> 32;
- cs->buf[cs->cdw - 1] =
- S_3F2_CHAIN(1) | S_3F2_VALID(1) | S_3F2_PRE_ENA(pre_ena) | next_acs->ib.size;
+ cs->buf[cs->cdw - 1] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | S_3F2_PRE_ENA(pre_ena) | next_acs->ib.size;
return true;
}
}
static void
-radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cmdbuf *_child,
- bool allow_ib2)
+radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cmdbuf *_child, bool allow_ib2)
{
struct radv_amdgpu_cs *parent = radv_amdgpu_cs(_parent);
struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child);
return;
for (unsigned i = 0; i < child->num_buffers; ++i) {
- radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i].bo_handle,
- child->handles[i].bo_priority);
+ radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i].bo_handle, child->handles[i].bo_priority);
}
for (unsigned i = 0; i < child->num_virtual_buffers; ++i) {
}
static unsigned
-radv_amdgpu_add_cs_to_bo_list(struct radv_amdgpu_cs *cs, struct drm_amdgpu_bo_list_entry *handles,
- unsigned num_handles)
+radv_amdgpu_add_cs_to_bo_list(struct radv_amdgpu_cs *cs, struct drm_amdgpu_bo_list_entry *handles, unsigned num_handles)
{
if (!cs->num_buffers)
return num_handles;
}
static unsigned
-radv_amdgpu_copy_global_bo_list(struct radv_amdgpu_winsys *ws,
- struct drm_amdgpu_bo_list_entry *handles)
+radv_amdgpu_copy_global_bo_list(struct radv_amdgpu_winsys *ws, struct drm_amdgpu_bo_list_entry *handles)
{
for (uint32_t i = 0; i < ws->global_bo_list.count; i++) {
handles[i].bo_handle = ws->global_bo_list.bos[i]->bo_handle;
}
static VkResult
-radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws, struct radeon_cmdbuf **cs_array,
- unsigned count, struct radeon_cmdbuf **initial_preamble_array,
- unsigned num_initial_preambles,
- struct radeon_cmdbuf **continue_preamble_array,
- unsigned num_continue_preambles, struct radeon_cmdbuf **postamble_array,
- unsigned num_postambles, unsigned *rnum_handles,
+radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws, struct radeon_cmdbuf **cs_array, unsigned count,
+ struct radeon_cmdbuf **initial_preamble_array, unsigned num_initial_preambles,
+ struct radeon_cmdbuf **continue_preamble_array, unsigned num_continue_preambles,
+ struct radeon_cmdbuf **postamble_array, unsigned num_postambles, unsigned *rnum_handles,
struct drm_amdgpu_bo_list_entry **rhandles)
{
struct drm_amdgpu_bo_list_entry *handles = NULL;
num_handles = radv_amdgpu_copy_global_bo_list(ws, handles);
} else if (count == 1 && !num_initial_preambles && !num_continue_preambles && !num_postambles &&
- !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers &&
- !radv_amdgpu_cs(cs_array[0])->chained_to && !ws->global_bo_list.count) {
+ !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers && !radv_amdgpu_cs(cs_array[0])->chained_to &&
+ !ws->global_bo_list.count) {
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)cs_array[0];
if (cs->num_buffers == 0)
return VK_SUCCESS;
} else {
unsigned total_buffer_count = ws->global_bo_list.count;
total_buffer_count += radv_amdgpu_count_cs_array_bo(cs_array, count);
- total_buffer_count +=
- radv_amdgpu_count_cs_array_bo(initial_preamble_array, num_initial_preambles);
- total_buffer_count +=
- radv_amdgpu_count_cs_array_bo(continue_preamble_array, num_continue_preambles);
+ total_buffer_count += radv_amdgpu_count_cs_array_bo(initial_preamble_array, num_initial_preambles);
+ total_buffer_count += radv_amdgpu_count_cs_array_bo(continue_preamble_array, num_continue_preambles);
total_buffer_count += radv_amdgpu_count_cs_array_bo(postamble_array, num_postambles);
if (total_buffer_count == 0)
num_handles = radv_amdgpu_copy_global_bo_list(ws, handles);
num_handles = radv_amdgpu_add_cs_array_to_bo_list(cs_array, count, handles, num_handles);
- num_handles = radv_amdgpu_add_cs_array_to_bo_list(
- initial_preamble_array, num_initial_preambles, handles, num_handles);
- num_handles = radv_amdgpu_add_cs_array_to_bo_list(
- continue_preamble_array, num_continue_preambles, handles, num_handles);
num_handles =
- radv_amdgpu_add_cs_array_to_bo_list(postamble_array, num_postambles, handles, num_handles);
+ radv_amdgpu_add_cs_array_to_bo_list(initial_preamble_array, num_initial_preambles, handles, num_handles);
+ num_handles =
+ radv_amdgpu_add_cs_array_to_bo_list(continue_preamble_array, num_continue_preambles, handles, num_handles);
+ num_handles = radv_amdgpu_add_cs_array_to_bo_list(postamble_array, num_postambles, handles, num_handles);
}
*rhandles = handles;
static void
radv_assign_last_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request *request)
{
- radv_amdgpu_request_to_fence(ctx, &ctx->last_submission[request->ip_type][request->ring],
- request);
+ radv_amdgpu_request_to_fence(ctx, &ctx->last_submission[request->ip_type][request->ring], request);
}
static VkResult
-radv_amdgpu_winsys_cs_submit_internal(
- struct radv_amdgpu_ctx *ctx, int queue_idx, struct radv_winsys_sem_info *sem_info,
- struct radeon_cmdbuf **cs_array, unsigned cs_count, struct radeon_cmdbuf **initial_preamble_cs,
- unsigned initial_preamble_count, struct radeon_cmdbuf **continue_preamble_cs,
- unsigned continue_preamble_count, struct radeon_cmdbuf **postamble_cs, unsigned postamble_count,
- bool uses_shadow_regs)
+radv_amdgpu_winsys_cs_submit_internal(struct radv_amdgpu_ctx *ctx, int queue_idx, struct radv_winsys_sem_info *sem_info,
+ struct radeon_cmdbuf **cs_array, unsigned cs_count,
+ struct radeon_cmdbuf **initial_preamble_cs, unsigned initial_preamble_count,
+ struct radeon_cmdbuf **continue_preamble_cs, unsigned continue_preamble_count,
+ struct radeon_cmdbuf **postamble_cs, unsigned postamble_count,
+ bool uses_shadow_regs)
{
VkResult result;
struct radv_amdgpu_winsys *ws = last_cs->ws;
assert(cs_count);
- const unsigned num_pre_post_cs =
- MAX2(initial_preamble_count, continue_preamble_count) + postamble_count;
+ const unsigned num_pre_post_cs = MAX2(initial_preamble_count, continue_preamble_count) + postamble_count;
const unsigned ib_array_size = MIN2(RADV_MAX_IBS_PER_SUBMIT, num_pre_post_cs + cs_count);
STACK_ARRAY(struct radv_amdgpu_cs_ib_info, ibs, ib_array_size);
u_rwlock_rdlock(&ws->global_bo_list.lock);
- result = radv_amdgpu_get_bo_list(
- ws, &cs_array[0], cs_count, initial_preamble_cs, initial_preamble_count, continue_preamble_cs,
- continue_preamble_count, postamble_cs, postamble_count, &num_handles, &handles);
+ result = radv_amdgpu_get_bo_list(ws, &cs_array[0], cs_count, initial_preamble_cs, initial_preamble_count,
+ continue_preamble_cs, continue_preamble_count, postamble_cs, postamble_count,
+ &num_handles, &handles);
if (result != VK_SUCCESS)
goto fail;
if (cs_ib_idx == 0) {
/* Make sure the whole CS fits into the same submission. */
unsigned cs_num_ib = cs->use_ib ? 1 : cs->num_old_ib_buffers;
- if (i + cs_num_ib > ib_per_submit ||
- ibs_per_ip[cs->hw_ip] + cs_num_ib > max_ib_per_ip[cs->hw_ip])
+ if (i + cs_num_ib > ib_per_submit || ibs_per_ip[cs->hw_ip] + cs_num_ib > max_ib_per_ip[cs->hw_ip])
break;
if (cs->hw_ip != request.ip_type) {
for (unsigned i = 0; i < sem_info->wait.timeline_syncobj_count; ++i) {
int fd2;
ret = amdgpu_cs_syncobj_export_sync_file2(
- ctx->ws->dev, sem_info->wait.syncobj[i + sem_info->wait.syncobj_count],
- sem_info->wait.points[i], 0, &fd2);
+ ctx->ws->dev, sem_info->wait.syncobj[i + sem_info->wait.syncobj_count], sem_info->wait.points[i], 0, &fd2);
if (ret < 0) {
/* This works around a kernel bug where the fence isn't copied if it is already
* signalled. Since it is already signalled it is totally fine to not wait on it.
*
* kernel patch: https://patchwork.freedesktop.org/patch/465583/ */
uint64_t point;
- ret = amdgpu_cs_syncobj_query2(
- ctx->ws->dev, &sem_info->wait.syncobj[i + sem_info->wait.syncobj_count], &point, 1,
- 0);
+ ret = amdgpu_cs_syncobj_query2(ctx->ws->dev, &sem_info->wait.syncobj[i + sem_info->wait.syncobj_count],
+ &point, 1, 0);
if (!ret && point >= sem_info->wait.points[i])
continue;
}
}
for (unsigned i = 0; i < sem_info->signal.timeline_syncobj_count; ++i) {
- ret = amdgpu_cs_syncobj_transfer(ctx->ws->dev,
- sem_info->signal.syncobj[i + sem_info->signal.syncobj_count],
+ ret = amdgpu_cs_syncobj_transfer(ctx->ws->dev, sem_info->signal.syncobj[i + sem_info->signal.syncobj_count],
sem_info->signal.points[i], queue_syncobj, 0, 0);
if (ret < 0)
return VK_ERROR_DEVICE_LOST;
}
static VkResult
-radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
- const struct radv_winsys_submit_info *submit, uint32_t wait_count,
- const struct vk_sync_wait *waits, uint32_t signal_count,
+radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, const struct radv_winsys_submit_info *submit,
+ uint32_t wait_count, const struct vk_sync_wait *waits, uint32_t signal_count,
const struct vk_sync_signal *signals)
{
struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
result = radv_amdgpu_cs_submit_zero(ctx, submit->ip_type, submit->queue_index, &sem_info);
} else {
result = radv_amdgpu_winsys_cs_submit_internal(
- ctx, submit->queue_index, &sem_info, submit->cs_array, submit->cs_count,
- submit->initial_preamble_cs, submit->initial_preamble_count, submit->continue_preamble_cs,
- submit->continue_preamble_count, submit->postamble_cs, submit->postamble_count,
- submit->uses_shadow_regs);
+ ctx, submit->queue_index, &sem_info, submit->cs_array, submit->cs_count, submit->initial_preamble_cs,
+ submit->initial_preamble_count, submit->continue_preamble_cs, submit->continue_preamble_count,
+ submit->postamble_cs, submit->postamble_count, submit->uses_shadow_regs);
}
out:
for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
struct radv_amdgpu_winsys_bo *bo;
- bo = (struct radv_amdgpu_winsys_bo *)(i == cs->num_old_ib_buffers ? cs->ib_buffer
- : cs->old_ib_buffers[i].bo);
+ bo = (struct radv_amdgpu_winsys_bo *)(i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i].bo);
if (addr >= bo->base.va && addr - bo->base.va < bo->size) {
if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
return (char *)ret + (addr - bo->base.va);
}
static void
-radv_amdgpu_winsys_cs_dump(struct radeon_cmdbuf *_cs, FILE *file, const int *trace_ids,
- int trace_id_count)
+radv_amdgpu_winsys_cs_dump(struct radeon_cmdbuf *_cs, FILE *file, const int *trace_ids, int trace_id_count)
{
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
void *ib = radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address);
int num_dw = cs->base.cdw;
assert(ib);
- ac_parse_ib(file, ib, num_dw, trace_ids, trace_id_count, "main IB", cs->ws->info.gfx_level,
- cs->ws->info.family, radv_amdgpu_winsys_get_cpu_addr, cs);
+ ac_parse_ib(file, ib, num_dw, trace_ids, trace_id_count, "main IB", cs->ws->info.gfx_level, cs->ws->info.family,
+ radv_amdgpu_winsys_get_cpu_addr, cs);
}
static uint32_t
}
static VkResult
-radv_amdgpu_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority priority,
- struct radeon_winsys_ctx **rctx)
+radv_amdgpu_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority priority, struct radeon_winsys_ctx **rctx)
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx);
assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * 4 * sizeof(uint64_t) <= 4096);
result = ws->base.buffer_create(&ws->base, 4096, 8, RADEON_DOMAIN_GTT,
- RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
- RADV_BO_PRIORITY_CS, 0, &ctx->fence_bo);
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING, RADV_BO_PRIORITY_CS, 0,
+ &ctx->fence_bo);
if (result != VK_SUCCESS) {
goto fail_alloc;
}
if (ctx->last_submission[ip_type][ring_index].fence.fence) {
uint32_t expired;
- int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index].fence,
- 1000000000ull, 0, &expired);
+ int ret =
+ amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index].fence, 1000000000ull, 0, &expired);
if (ret || !expired)
return false;
struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
{
unsigned count = counts->syncobj_count + (queue_syncobj ? 1 : 0);
- struct drm_amdgpu_cs_chunk_sem *syncobj =
- malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * count);
+ struct drm_amdgpu_cs_chunk_sem *syncobj = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * count);
if (!syncobj)
return NULL;
}
static void *
-radv_amdgpu_cs_alloc_timeline_syncobj_chunk(struct radv_winsys_sem_counts *counts,
- uint32_t queue_syncobj,
+radv_amdgpu_cs_alloc_timeline_syncobj_chunk(struct radv_winsys_sem_counts *counts, uint32_t queue_syncobj,
struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
{
- uint32_t count =
- counts->syncobj_count + counts->timeline_syncobj_count + (queue_syncobj ? 1 : 0);
- struct drm_amdgpu_cs_chunk_syncobj *syncobj =
- malloc(sizeof(struct drm_amdgpu_cs_chunk_syncobj) * count);
+ uint32_t count = counts->syncobj_count + counts->timeline_syncobj_count + (queue_syncobj ? 1 : 0);
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj = malloc(sizeof(struct drm_amdgpu_cs_chunk_syncobj) * count);
if (!syncobj)
return NULL;
static bool
radv_amdgpu_cs_has_user_fence(struct radv_amdgpu_cs_request *request)
{
- return request->ip_type != AMDGPU_HW_IP_UVD &&
- request->ip_type != AMDGPU_HW_IP_VCE &&
- request->ip_type != AMDGPU_HW_IP_UVD_ENC &&
- request->ip_type != AMDGPU_HW_IP_VCN_DEC &&
- request->ip_type != AMDGPU_HW_IP_VCN_ENC &&
- request->ip_type != AMDGPU_HW_IP_VCN_JPEG;
+ return request->ip_type != AMDGPU_HW_IP_UVD && request->ip_type != AMDGPU_HW_IP_VCE &&
+ request->ip_type != AMDGPU_HW_IP_UVD_ENC && request->ip_type != AMDGPU_HW_IP_VCN_DEC &&
+ request->ip_type != AMDGPU_HW_IP_VCN_ENC && request->ip_type != AMDGPU_HW_IP_VCN_JPEG;
}
static VkResult
amdgpu_cs_chunk_fence_info_to_data(&fence_info, &chunk_data[i]);
}
- if (sem_info->cs_emit_wait && (sem_info->wait.timeline_syncobj_count ||
- sem_info->wait.syncobj_count || *queue_syncobj_wait)) {
+ if (sem_info->cs_emit_wait &&
+ (sem_info->wait.timeline_syncobj_count || sem_info->wait.syncobj_count || *queue_syncobj_wait)) {
if (ctx->ws->info.has_timeline_syncobj) {
- wait_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(
- &sem_info->wait, queue_syncobj, &chunks[num_chunks],
- AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT);
+ wait_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(&sem_info->wait, queue_syncobj, &chunks[num_chunks],
+ AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT);
} else {
- wait_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(
- &sem_info->wait, queue_syncobj, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_IN);
+ wait_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->wait, queue_syncobj, &chunks[num_chunks],
+ AMDGPU_CHUNK_ID_SYNCOBJ_IN);
}
if (!wait_syncobj) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
if (sem_info->cs_emit_signal) {
if (ctx->ws->info.has_timeline_syncobj) {
signal_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(
- &sem_info->signal, queue_syncobj, &chunks[num_chunks],
- AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL);
+ &sem_info->signal, queue_syncobj, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL);
} else {
- signal_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(
- &sem_info->signal, queue_syncobj, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_OUT);
+ signal_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->signal, queue_syncobj, &chunks[num_chunks],
+ AMDGPU_CHUNK_ID_SYNCOBJ_OUT);
}
if (!signal_syncobj) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
* IN THE SOFTWARE.
*/
-#include "util/bitset.h"
#include "radv_amdgpu_surface.h"
+#include "util/bitset.h"
#include "radv_amdgpu_winsys.h"
#include "radv_private.h"
#include "sid.h"
config.is_1d = type == RADEON_SURF_TYPE_1D || type == RADEON_SURF_TYPE_1D_ARRAY;
config.is_3d = type == RADEON_SURF_TYPE_3D;
config.is_cube = type == RADEON_SURF_TYPE_CUBEMAP;
- config.is_array = type == RADEON_SURF_TYPE_1D_ARRAY ||
- type == RADEON_SURF_TYPE_2D_ARRAY;
+ config.is_array = type == RADEON_SURF_TYPE_1D_ARRAY || type == RADEON_SURF_TYPE_2D_ARRAY;
return ac_compute_surface(ws->addrlib, &ws->info, &config, mode, surf);
}
amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &heap);
return heap.heap_usage;
case RADEON_VRAM_VIS_USAGE:
- amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
- &heap);
+ amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &heap);
return heap.heap_usage;
case RADEON_GTT_USAGE:
amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &heap);
}
static bool
-radv_amdgpu_winsys_read_registers(struct radeon_winsys *rws, unsigned reg_offset,
- unsigned num_registers, uint32_t *out)
+radv_amdgpu_winsys_read_registers(struct radeon_winsys *rws, unsigned reg_offset, unsigned num_registers, uint32_t *out)
{
struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws;
/* Check that options don't differ from the existing winsys. */
if (((debug_flags & RADV_DEBUG_ALL_BOS) && !ws->debug_all_bos) ||
((debug_flags & RADV_DEBUG_HANG) && !ws->debug_log_bos) ||
- ((debug_flags & RADV_DEBUG_NO_IBS) && ws->use_ib_bos) ||
- (perftest_flags != ws->perftest)) {
+ ((debug_flags & RADV_DEBUG_NO_IBS) && ws->use_ib_bos) || (perftest_flags != ws->perftest)) {
fprintf(stderr, "radv/amdgpu: Found options that differ from the existing winsys.\n");
return NULL;
}
#ifndef RADV_AMDGPU_WINSYS_PUBLIC_H
#define RADV_AMDGPU_WINSYS_PUBLIC_H
-struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags,
- uint64_t perftest_flags,
+struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
bool reserve_vmid);
struct radeon_winsys *radv_dummy_winsys_create(void);
static VkResult
radv_null_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
- enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags,
- unsigned priority, uint64_t address, struct radeon_winsys_bo **out_bo)
+ enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags, unsigned priority,
+ uint64_t address, struct radeon_winsys_bo **out_bo)
{
struct radv_null_winsys_bo *bo;
}
static VkResult
-radv_null_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
- bool resident)
+radv_null_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, bool resident)
{
return VK_SUCCESS;
}
}
static VkResult
-radv_null_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority priority,
- struct radeon_winsys_ctx **rctx)
+radv_null_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority priority, struct radeon_winsys_ctx **rctx)
{
struct radv_null_ctx *ctx = CALLOC_STRUCT(radv_null_ctx);
: info->gfx_level >= GFX7 ? 64 * 1024
: 32 * 1024;
info->lds_encode_granularity = info->gfx_level >= GFX7 ? 128 * 4 : 64 * 4;
- info->lds_alloc_granularity =
- info->gfx_level >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
+ info->lds_alloc_granularity = info->gfx_level >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
info->max_render_backends = gpu_info[info->family].num_render_backends;
info->has_dedicated_vram = gpu_info[info->family].has_dedicated_vram;
info->has_packed_math_16bit = info->gfx_level >= GFX9;
- info->has_image_load_dcc_bug =
- info->family == CHIP_NAVI23 || info->family == CHIP_VANGOGH;
+ info->has_image_load_dcc_bug = info->family == CHIP_NAVI23 || info->family == CHIP_VANGOGH;
info->has_accelerated_dot_product =
- info->family == CHIP_VEGA20 ||
- (info->family >= CHIP_MI100 && info->family != CHIP_NAVI10);
+ info->family == CHIP_VEGA20 || (info->family >= CHIP_MI100 && info->family != CHIP_NAVI10);
info->address32_hi = info->gfx_level >= GFX9 ? 0xffff8000u : 0x0;
info->has_rbplus = info->family == CHIP_STONEY || info->gfx_level >= GFX9;
info->rbplus_allowed =
- info->has_rbplus &&
- (info->family == CHIP_STONEY || info->family == CHIP_VEGA12 || info->family == CHIP_RAVEN ||
- info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->gfx_level >= GFX10_3);
+ info->has_rbplus && (info->family == CHIP_STONEY || info->family == CHIP_VEGA12 || info->family == CHIP_RAVEN ||
+ info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->gfx_level >= GFX10_3);
info->has_scheduled_fence_dependency = true;
info->has_gang_submit = true;