From 1952fd8d2ce90586bc20c0f24593b00604eb1592 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Fri, 5 Oct 2018 17:54:07 +0300 Subject: [PATCH] anv: Implement VK_EXT_conditional_rendering for gen 7.5+ Conditional rendering affects next functions: - vkCmdDraw, vkCmdDrawIndexed, vkCmdDrawIndirect, vkCmdDrawIndexedIndirect - vkCmdDrawIndirectCountKHR, vkCmdDrawIndexedIndirectCountKHR - vkCmdDispatch, vkCmdDispatchIndirect, vkCmdDispatchBase - vkCmdClearAttachments Value from conditional buffer is cached into designated register, MI_PREDICATE is emitted every time conditional rendering is enabled and command requires it. v2: by Jason Ekstrand - Use vk_find_struct_const instead of manually looping - Move draw count loading to prepare function - Zero the top 32-bits of MI_ALU_REG15 v3: Apply pipeline flush before accessing conditional buffer (The issue was found by Samuel Iglesias) v4: - Remove support of Haswell due to possible hardware bug - Made TMP_REG_PREDICATE and TMP_REG_DRAW_COUNT defines to define registers in one place. v5: thanks to Jason Ekstrand and Lionel Landwerlin - Workaround the fact that MI_PREDICATE_RESULT is not accessible on Haswell by manually calculating MI_PREDICATE_RESULT and re-emitting MI_PREDICATE when necessary. v6: suggested by Lionel Landwerlin - Instead of calculating the result of predicate once - re-emit MI_PREDICATE to make it easier to investigate error states. v7: suggested by Jason - Make anv_pipe_invalidate_bits_for_access_flag add CS_STALL if VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT is set. v8: suggested by Lionel - Precompute conditional predicate's result to support secondary command buffers. - Make prepare_for_draw_count_predicate more readable. Signed-off-by: Danylo Piliaiev Reviewed-by: Lionel Landwerlin Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_blorp.c | 8 +- src/intel/vulkan/anv_cmd_buffer.c | 8 ++ src/intel/vulkan/anv_device.c | 12 ++ src/intel/vulkan/anv_extensions.py | 1 + src/intel/vulkan/anv_genX.h | 2 + src/intel/vulkan/anv_private.h | 12 ++ src/intel/vulkan/genX_cmd_buffer.c | 236 +++++++++++++++++++++++++++++++++++-- 7 files changed, 265 insertions(+), 14 deletions(-) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 67e8d5a..827f224 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -1144,8 +1144,12 @@ void anv_CmdClearAttachments( * trash our depth and stencil buffers. */ struct blorp_batch batch; - blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, - BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); + enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL; + if (cmd_buffer->state.conditional_render_enabled) { + anv_cmd_emit_conditional_render_predicate(cmd_buffer); + flags |= BLORP_BATCH_PREDICATE_ENABLE; + } + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, flags); for (uint32_t a = 0; a < attachmentCount; ++a) { if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 67e777b..7b3a3a4 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -377,6 +377,14 @@ anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer, level, base_layer, layer_count); } +void +anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer) +{ + anv_genX_call(&cmd_buffer->device->info, + cmd_emit_conditional_render_predicate, + cmd_buffer); +} + void anv_CmdBindPipeline( VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 45b1ca8..9d8a982 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -916,6 +916,18 @@ void anv_GetPhysicalDeviceFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: { + VkPhysicalDeviceConditionalRenderingFeaturesEXT *features = + (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext; + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + + features->conditionalRendering = pdevice->info.gen >= 8 || + pdevice->info.is_haswell; + features->inheritedConditionalRendering = pdevice->info.gen >= 8 || + pdevice->info.is_haswell; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: { VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures *)ext; diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py index 79aef13..eb69264 100644 --- a/src/intel/vulkan/anv_extensions.py +++ b/src/intel/vulkan/anv_extensions.py @@ -118,6 +118,7 @@ EXTENSIONS = [ Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'), Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'), Extension('VK_EXT_calibrated_timestamps', 1, True), + Extension('VK_EXT_conditional_rendering', 1, 'device->info.gen >= 8 || device->info.is_haswell'), Extension('VK_EXT_debug_report', 8, True), Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), Extension('VK_EXT_display_control', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 7921e06..8fd32ca 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -66,6 +66,8 @@ void genX(cmd_buffer_mark_image_written)(struct anv_cmd_buffer *cmd_buffer, uint32_t base_layer, uint32_t layer_count); +void genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer); + void genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, const struct gen_l3_config *l3_config, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index ca10995..0299feb 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -183,6 +183,11 @@ struct gen_l3_config; #define ANV_SVGS_VB_INDEX MAX_VBS #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1) +/* We reserve this MI ALU register for the purpose of handling predication. + * Other code which uses the MI ALU should leave it alone. + */ +#define ANV_PREDICATE_RESULT_REG MI_ALU_REG15 + #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) static inline uint32_t @@ -1870,6 +1875,9 @@ anv_pipe_invalidate_bits_for_access_flags(VkAccessFlags flags) case VK_ACCESS_MEMORY_WRITE_BIT: pipe_bits |= ANV_PIPE_FLUSH_BITS; break; + case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT: + pipe_bits |= ANV_PIPE_CS_STALL_BIT; + break; default: break; /* Nothing to do */ } @@ -2104,6 +2112,8 @@ struct anv_cmd_state { */ bool hiz_enabled; + bool conditional_render_enabled; + /** * Array length is anv_cmd_state::pass::attachment_count. Array content is * valid only when recording a render pass instance. @@ -2261,6 +2271,8 @@ anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer, void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer); + enum anv_fence_type { ANV_FENCE_TYPE_NONE = 0, ANV_FENCE_TYPE_BO, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 6c45acd..e69c169 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -479,8 +479,9 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer, 0, 0, 1, hiz_op); } -#define MI_PREDICATE_SRC0 0x2400 -#define MI_PREDICATE_SRC1 0x2408 +#define MI_PREDICATE_SRC0 0x2400 +#define MI_PREDICATE_SRC1 0x2408 +#define MI_PREDICATE_RESULT 0x2418 static void set_image_compressed_bit(struct anv_cmd_buffer *cmd_buffer, @@ -1411,6 +1412,19 @@ genX(BeginCommandBuffer)( cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS; } +#if GEN_GEN >= 8 || GEN_IS_HASWELL + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + const VkCommandBufferInheritanceConditionalRenderingInfoEXT *conditional_rendering_info = + vk_find_struct_const(pBeginInfo->pInheritanceInfo->pNext, COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT); + + /* If secondary buffer supports conditional rendering + * we should emit commands as if conditional rendering is enabled. + */ + cmd_buffer->state.conditional_render_enabled = + conditional_rendering_info && conditional_rendering_info->conditionalRenderingEnable; + } +#endif + return result; } @@ -1515,6 +1529,19 @@ genX(CmdExecuteCommands)( assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); assert(!anv_batch_has_error(&secondary->batch)); +#if GEN_GEN >= 8 || GEN_IS_HASWELL + if (secondary->state.conditional_render_enabled) { + if (!primary->state.conditional_render_enabled) { + /* Secondary buffer is constructed as if it will be executed + * with conditional rendering, we should satisfy this dependency + * regardless of conditional rendering being enabled in primary. + */ + emit_lri(&primary->batch, CS_GPR(ANV_PREDICATE_RESULT_REG), UINT32_MAX); + emit_lri(&primary->batch, CS_GPR(ANV_PREDICATE_RESULT_REG) + 4, UINT32_MAX); + } + } +#endif + if (secondary->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { /* If we're continuing a render pass from the primary, we need to @@ -2777,6 +2804,9 @@ void genX(CmdDraw)( genX(cmd_buffer_flush_state)(cmd_buffer); + if (cmd_buffer->state.conditional_render_enabled) + genX(cmd_emit_conditional_render_predicate)(cmd_buffer); + if (vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); @@ -2789,6 +2819,7 @@ void genX(CmdDraw)( instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass); anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { + prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled; prim.VertexAccessType = SEQUENTIAL; prim.PrimitiveTopologyType = pipeline->topology; prim.VertexCountPerInstance = vertexCount; @@ -2818,6 +2849,9 @@ void genX(CmdDrawIndexed)( genX(cmd_buffer_flush_state)(cmd_buffer); + if (cmd_buffer->state.conditional_render_enabled) + genX(cmd_emit_conditional_render_predicate)(cmd_buffer); + if (vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); @@ -2830,6 +2864,7 @@ void genX(CmdDrawIndexed)( instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass); anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { + prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled; prim.VertexAccessType = RANDOM; prim.PrimitiveTopologyType = pipeline->topology; prim.VertexCountPerInstance = indexCount; @@ -2954,6 +2989,9 @@ void genX(CmdDrawIndirect)( genX(cmd_buffer_flush_state)(cmd_buffer); + if (cmd_buffer->state.conditional_render_enabled) + genX(cmd_emit_conditional_render_predicate)(cmd_buffer); + for (uint32_t i = 0; i < drawCount; i++) { struct anv_address draw = anv_address_add(buffer->address, offset); @@ -2967,6 +3005,7 @@ void genX(CmdDrawIndirect)( anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { prim.IndirectParameterEnable = true; + prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled; prim.VertexAccessType = SEQUENTIAL; prim.PrimitiveTopologyType = pipeline->topology; } @@ -2994,6 +3033,9 @@ void genX(CmdDrawIndexedIndirect)( genX(cmd_buffer_flush_state)(cmd_buffer); + if (cmd_buffer->state.conditional_render_enabled) + genX(cmd_emit_conditional_render_predicate)(cmd_buffer); + for (uint32_t i = 0; i < drawCount; i++) { struct anv_address draw = anv_address_add(buffer->address, offset); @@ -3008,6 +3050,7 @@ void genX(CmdDrawIndexedIndirect)( anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { prim.IndirectParameterEnable = true; + prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled; prim.VertexAccessType = RANDOM; prim.PrimitiveTopologyType = pipeline->topology; } @@ -3018,17 +3061,27 @@ void genX(CmdDrawIndexedIndirect)( cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } +#define TMP_DRAW_COUNT_REG MI_ALU_REG14 + static void prepare_for_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer, - struct anv_address count_address) + struct anv_address count_address, + const bool conditional_render_enabled) { - /* Upload the current draw count from the draw parameters buffer to - * MI_PREDICATE_SRC0. - */ - emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0, count_address); - emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0); + if (conditional_render_enabled) { +#if GEN_GEN >= 8 || GEN_IS_HASWELL + emit_lrm(&cmd_buffer->batch, CS_GPR(TMP_DRAW_COUNT_REG), count_address); + emit_lri(&cmd_buffer->batch, CS_GPR(TMP_DRAW_COUNT_REG) + 4, 0); +#endif + } else { + /* Upload the current draw count from the draw parameters buffer to + * MI_PREDICATE_SRC0. + */ + emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0, count_address); + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0); - emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0); + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0); + } } static void @@ -3060,6 +3113,54 @@ emit_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer, } } +#if GEN_GEN >= 8 || GEN_IS_HASWELL +static void +emit_draw_count_predicate_with_conditional_render( + struct anv_cmd_buffer *cmd_buffer, + uint32_t draw_index) +{ + const int draw_index_reg = MI_ALU_REG0; + const int tmp_result_reg = MI_ALU_REG1; + + emit_lri(&cmd_buffer->batch, CS_GPR(draw_index_reg), draw_index); + emit_lri(&cmd_buffer->batch, CS_GPR(draw_index_reg) + 4, 0); + + uint32_t *dw; + /* Compute (draw_index < draw_count). + * We do this by subtracting and storing the carry bit. + */ + dw = anv_batch_emitn(&cmd_buffer->batch, 9, GENX(MI_MATH)); + dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, draw_index_reg); + dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, TMP_DRAW_COUNT_REG); + dw[3] = mi_alu(MI_ALU_SUB, 0, 0); + dw[4] = mi_alu(MI_ALU_STORE, tmp_result_reg, MI_ALU_CF); + /* & condition */ + dw[5] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, tmp_result_reg); + dw[6] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, ANV_PREDICATE_RESULT_REG); + dw[7] = mi_alu(MI_ALU_AND, 0, 0); + dw[8] = mi_alu(MI_ALU_STORE, tmp_result_reg, MI_ALU_ACCU); + +#if GEN_GEN >= 8 + emit_lrr(&cmd_buffer->batch, MI_PREDICATE_RESULT, CS_GPR(tmp_result_reg)); +#else + /* MI_PREDICATE_RESULT is not whitelisted in i915 command parser + * so we emit MI_PREDICATE to set it. + */ + + emit_lrr(&cmd_buffer->batch, MI_PREDICATE_SRC0, CS_GPR(tmp_result_reg)); + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0); + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, 0); + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOADINV; + mip.CombineOperation = COMBINE_SET; + mip.CompareOperation = COMPARE_SRCS_EQUAL; + } +#endif +} +#endif + void genX(CmdDrawIndirectCountKHR)( VkCommandBuffer commandBuffer, VkBuffer _buffer, @@ -3084,12 +3185,21 @@ void genX(CmdDrawIndirectCountKHR)( struct anv_address count_address = anv_address_add(count_buffer->address, countBufferOffset); - prepare_for_draw_count_predicate(cmd_buffer, count_address); + prepare_for_draw_count_predicate(cmd_buffer, count_address, + cmd_state->conditional_render_enabled); for (uint32_t i = 0; i < maxDrawCount; i++) { struct anv_address draw = anv_address_add(buffer->address, offset); +#if GEN_GEN >= 8 || GEN_IS_HASWELL + if (cmd_state->conditional_render_enabled) { + emit_draw_count_predicate_with_conditional_render(cmd_buffer, i); + } else { + emit_draw_count_predicate(cmd_buffer, i); + } +#else emit_draw_count_predicate(cmd_buffer, i); +#endif if (vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) @@ -3136,12 +3246,21 @@ void genX(CmdDrawIndexedIndirectCountKHR)( struct anv_address count_address = anv_address_add(count_buffer->address, countBufferOffset); - prepare_for_draw_count_predicate(cmd_buffer, count_address); + prepare_for_draw_count_predicate(cmd_buffer, count_address, + cmd_state->conditional_render_enabled); for (uint32_t i = 0; i < maxDrawCount; i++) { struct anv_address draw = anv_address_add(buffer->address, offset); +#if GEN_GEN >= 8 || GEN_IS_HASWELL + if (cmd_state->conditional_render_enabled) { + emit_draw_count_predicate_with_conditional_render(cmd_buffer, i); + } else { + emit_draw_count_predicate(cmd_buffer, i); + } +#else emit_draw_count_predicate(cmd_buffer, i); +#endif /* TODO: We need to stomp base vertex to 0 somehow */ if (vs_prog_data->uses_firstvertex || @@ -3369,7 +3488,11 @@ void genX(CmdDispatchBase)( genX(cmd_buffer_flush_compute_state)(cmd_buffer); + if (cmd_buffer->state.conditional_render_enabled) + genX(cmd_emit_conditional_render_predicate)(cmd_buffer); + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) { + ggw.PredicateEnable = cmd_buffer->state.conditional_render_enabled; ggw.SIMDSize = prog_data->simd_size / 16; ggw.ThreadDepthCounterMaximum = 0; ggw.ThreadHeightCounterMaximum = 0; @@ -3463,11 +3586,28 @@ void genX(CmdDispatchIndirect)( mip.CombineOperation = COMBINE_OR; mip.CompareOperation = COMPARE_FALSE; } + +#if GEN_IS_HASWELL + if (cmd_buffer->state.conditional_render_enabled) { + emit_lrr(batch, MI_PREDICATE_SRC0, CS_GPR(ANV_PREDICATE_RESULT_REG)); + /* predicate &= !(conditional_rendering_predicate == 0); */ + anv_batch_emit(batch, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOADINV; + mip.CombineOperation = COMBINE_AND; + mip.CompareOperation = COMPARE_SRCS_EQUAL; + } + } +#endif + +#else /* GEN_GEN > 7 */ + if (cmd_buffer->state.conditional_render_enabled) + genX(cmd_emit_conditional_render_predicate)(cmd_buffer); #endif anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) { ggw.IndirectParameterEnable = true; - ggw.PredicateEnable = GEN_GEN <= 7; + ggw.PredicateEnable = GEN_GEN <= 7 || + cmd_buffer->state.conditional_render_enabled; ggw.SIMDSize = prog_data->simd_size / 16; ggw.ThreadDepthCounterMaximum = 0; ggw.ThreadHeightCounterMaximum = 0; @@ -4423,3 +4563,75 @@ void genX(CmdEndRenderPass2KHR)( { genX(CmdEndRenderPass)(commandBuffer); } + +void +genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer) +{ +#if GEN_GEN >= 8 || GEN_IS_HASWELL + emit_lrr(&cmd_buffer->batch, MI_PREDICATE_SRC0, CS_GPR(ANV_PREDICATE_RESULT_REG)); + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0); + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, 0); + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOADINV; + mip.CombineOperation = COMBINE_SET; + mip.CompareOperation = COMPARE_SRCS_EQUAL; + } +#endif +} + +#if GEN_GEN >= 8 || GEN_IS_HASWELL +void genX(CmdBeginConditionalRenderingEXT)( + VkCommandBuffer commandBuffer, + const VkConditionalRenderingBeginInfoEXT* pConditionalRenderingBegin) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, pConditionalRenderingBegin->buffer); + struct anv_cmd_state *cmd_state = &cmd_buffer->state; + struct anv_address value_address = + anv_address_add(buffer->address, pConditionalRenderingBegin->offset); + + const bool isInverted = pConditionalRenderingBegin->flags & + VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT; + + cmd_state->conditional_render_enabled = true; + + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + + /* Section 19.4 of the Vulkan 1.1.85 spec says: + * + * If the value of the predicate in buffer memory changes + * while conditional rendering is active, the rendering commands + * may be discarded in an implementation-dependent way. + * Some implementations may latch the value of the predicate + * upon beginning conditional rendering while others + * may read it before every rendering command. + * + * So it's perfectly fine to read a value from the buffer once. + */ + emit_lrm(&cmd_buffer->batch, CS_GPR(MI_ALU_REG0), value_address); + /* Zero the top 32-bits of MI_PREDICATE_SRC0 */ + emit_lri(&cmd_buffer->batch, CS_GPR(MI_ALU_REG0) + 4, 0); + + /* Precompute predicate result, it is necessary to support secondary + * command buffers since it is unknown if conditional rendering is + * inverted when populating them. + */ + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); + dw[1] = mi_alu(MI_ALU_LOAD0, MI_ALU_SRCA, 0); + dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, MI_ALU_REG0); + dw[3] = mi_alu(MI_ALU_SUB, 0, 0); + dw[4] = mi_alu(isInverted ? MI_ALU_STOREINV : MI_ALU_STORE, + ANV_PREDICATE_RESULT_REG, MI_ALU_CF); +} + +void genX(CmdEndConditionalRenderingEXT)( + VkCommandBuffer commandBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_cmd_state *cmd_state = &cmd_buffer->state; + + cmd_state->conditional_render_enabled = false; +} +#endif -- 2.7.4