From a1f7e7d93e9861381e20e8f975f6ed56492a2a02 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sun, 30 Jul 2023 10:34:43 +0300 Subject: [PATCH] anv: move all dynamic state emission to cmd_buffer_flush_dynamic_state MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Lionel Landwerlin Reviewed-by: Tapani Pälli Part-of: --- src/intel/vulkan/genX_cmd_buffer.c | 393 ------------------------------------- src/intel/vulkan/gfx8_cmd_buffer.c | 392 ++++++++++++++++++++++++++++++++++++ 2 files changed, 392 insertions(+), 393 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index dd8481b..2dc8f13 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -33,8 +33,6 @@ #include "genxml/gen_macros.h" #include "genxml/genX_pack.h" #include "genxml/genX_rt_pack.h" -#include "common/intel_guardband.h" -#include "compiler/brw_prim.h" #include "common/intel_genX_state.h" #include "ds/intel_tracepoints.h" @@ -2873,277 +2871,6 @@ cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer, } #endif -static void -cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer) -{ - const struct vk_dynamic_graphics_state *dyn = - &cmd_buffer->vk.dynamic_graphics_state; - - if (!(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) && - !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) && - !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) && - !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT) && - !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_POLYGON_MODE) && - !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX)) - return; - - struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - - anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_CLIP), - pipeline->gfx8.clip, clip) { - /* Take dynamic primitive topology in to account with - * 3DSTATE_CLIP::ViewportXYClipTestEnable - */ - const VkPolygonMode dynamic_raster_mode = - genX(raster_polygon_mode)(pipeline, - dyn->rs.polygon_mode, - dyn->ia.primitive_topology); - const bool xy_clip_test_enable = - (dynamic_raster_mode == VK_POLYGON_MODE_FILL); - - clip.APIMode = dyn->vp.depth_clip_negative_one_to_one ? - APIMODE_OGL : APIMODE_D3D; - clip.ViewportXYClipTestEnable = xy_clip_test_enable; - - ANV_SETUP_PROVOKING_VERTEX(clip, dyn->rs.provoking_vertex); - - /* TODO(mesh): Multiview. */ - if (anv_pipeline_is_primitive(pipeline)) { - const struct brw_vue_prog_data *last = - anv_pipeline_get_last_vue_prog_data(pipeline); - if (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT) { - clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ? - dyn->vp.viewport_count - 1 : 0; - } - } else if (anv_pipeline_is_mesh(pipeline)) { - const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); - if (mesh_prog_data->map.start_dw[VARYING_SLOT_VIEWPORT] >= 0) { - clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ? - dyn->vp.viewport_count - 1 : 0; - } - } - } -} - -static void -cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_instance *instance = cmd_buffer->device->physical->instance; - struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; - const struct vk_dynamic_graphics_state *dyn = - &cmd_buffer->vk.dynamic_graphics_state; - uint32_t count = dyn->vp.viewport_count; - const VkViewport *viewports = dyn->vp.viewports; - struct anv_state sf_clip_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); - - const float scale = dyn->vp.depth_clip_negative_one_to_one ? 0.5f : 1.0f; - - for (uint32_t i = 0; i < count; i++) { - const VkViewport *vp = &viewports[i]; - - /* The gfx7 state struct has just the matrix and guardband fields, the - * gfx8 struct adds the min/max viewport fields. */ - struct GENX(SF_CLIP_VIEWPORT) sfv = { - .ViewportMatrixElementm00 = vp->width / 2, - .ViewportMatrixElementm11 = vp->height / 2, - .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) * scale, - .ViewportMatrixElementm30 = vp->x + vp->width / 2, - .ViewportMatrixElementm31 = vp->y + vp->height / 2, - .ViewportMatrixElementm32 = dyn->vp.depth_clip_negative_one_to_one ? - (vp->minDepth + vp->maxDepth) * scale : vp->minDepth, - .XMinClipGuardband = -1.0f, - .XMaxClipGuardband = 1.0f, - .YMinClipGuardband = -1.0f, - .YMaxClipGuardband = 1.0f, - .XMinViewPort = vp->x, - .XMaxViewPort = vp->x + vp->width - 1, - .YMinViewPort = MIN2(vp->y, vp->y + vp->height), - .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1, - }; - - /* Fix depth test misrenderings by lowering translated depth range */ - if (instance->lower_depth_range_rate != 1.0f) - sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate; - - const uint32_t fb_size_max = 1 << 14; - uint32_t x_min = 0, x_max = fb_size_max; - uint32_t y_min = 0, y_max = fb_size_max; - - /* If we have a valid renderArea, include that */ - if (gfx->render_area.extent.width > 0 && - gfx->render_area.extent.height > 0) { - x_min = MAX2(x_min, gfx->render_area.offset.x); - x_max = MIN2(x_max, gfx->render_area.offset.x + - gfx->render_area.extent.width); - y_min = MAX2(y_min, gfx->render_area.offset.y); - y_max = MIN2(y_max, gfx->render_area.offset.y + - gfx->render_area.extent.height); - } - - /* The client is required to have enough scissors for whatever it sets - * as ViewportIndex but it's possible that they've got more viewports - * set from a previous command. Also, from the Vulkan 1.3.207: - * - * "The application must ensure (using scissor if necessary) that - * all rendering is contained within the render area." - * - * If the client doesn't set a scissor, that basically means it - * guarantees everything is in-bounds already. If we end up using a - * guardband of [-1, 1] in that case, there shouldn't be much loss. - * It's theoretically possible that they could do all their clipping - * with clip planes but that'd be a bit odd. - */ - if (i < dyn->vp.scissor_count) { - const VkRect2D *scissor = &dyn->vp.scissors[i]; - x_min = MAX2(x_min, scissor->offset.x); - x_max = MIN2(x_max, scissor->offset.x + scissor->extent.width); - y_min = MAX2(y_min, scissor->offset.y); - y_max = MIN2(y_max, scissor->offset.y + scissor->extent.height); - } - - /* Only bother calculating the guardband if our known render area is - * less than the maximum size. Otherwise, it will calculate [-1, 1] - * anyway but possibly with precision loss. - */ - if (x_min > 0 || x_max < fb_size_max || - y_min > 0 || y_max < fb_size_max) { - intel_calculate_guardband_size(x_min, x_max, y_min, y_max, - sfv.ViewportMatrixElementm00, - sfv.ViewportMatrixElementm11, - sfv.ViewportMatrixElementm30, - sfv.ViewportMatrixElementm31, - &sfv.XMinClipGuardband, - &sfv.XMaxClipGuardband, - &sfv.YMinClipGuardband, - &sfv.YMaxClipGuardband); - } - - GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv); - } - - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) { - clip.SFClipViewportPointer = sf_clip_state.offset; - } -} - -static void -cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer) -{ - const struct vk_dynamic_graphics_state *dyn = - &cmd_buffer->vk.dynamic_graphics_state; - uint32_t count = dyn->vp.viewport_count; - const VkViewport *viewports = dyn->vp.viewports; - struct anv_state cc_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); - - for (uint32_t i = 0; i < count; i++) { - const VkViewport *vp = &viewports[i]; - - /* From the Vulkan spec: - * - * "It is valid for minDepth to be greater than or equal to - * maxDepth." - */ - float min_depth = MIN2(vp->minDepth, vp->maxDepth); - float max_depth = MAX2(vp->minDepth, vp->maxDepth); - - struct GENX(CC_VIEWPORT) cc_viewport = { - .MinimumDepth = dyn->rs.depth_clamp_enable ? min_depth : 0.0f, - .MaximumDepth = dyn->rs.depth_clamp_enable ? max_depth : 1.0f, - }; - - GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); - } - - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) { - cc.CCViewportPointer = cc_state.offset; - } -} - -static void -cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; - const struct vk_dynamic_graphics_state *dyn = - &cmd_buffer->vk.dynamic_graphics_state; - uint32_t count = dyn->vp.scissor_count; - const VkRect2D *scissors = dyn->vp.scissors; - const VkViewport *viewports = dyn->vp.viewports; - - /* Wa_1409725701: - * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is - * stored as an array of up to 16 elements. The location of first - * element of the array, as specified by Pointer to SCISSOR_RECT, should - * be aligned to a 64-byte boundary. - */ - uint32_t alignment = 64; - struct anv_state scissor_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, alignment); - - for (uint32_t i = 0; i < count; i++) { - const VkRect2D *s = &scissors[i]; - const VkViewport *vp = &viewports[i]; - - /* Since xmax and ymax are inclusive, we have to have xmax < xmin or - * ymax < ymin for empty clips. In case clip x, y, width height are all - * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't - * what we want. Just special case empty clips and produce a canonical - * empty clip. */ - static const struct GENX(SCISSOR_RECT) empty_scissor = { - .ScissorRectangleYMin = 1, - .ScissorRectangleXMin = 1, - .ScissorRectangleYMax = 0, - .ScissorRectangleXMax = 0 - }; - - const int max = 0xffff; - - uint32_t y_min = MAX2(s->offset.y, MIN2(vp->y, vp->y + vp->height)); - uint32_t x_min = MAX2(s->offset.x, vp->x); - int64_t y_max = MIN2(s->offset.y + s->extent.height - 1, - MAX2(vp->y, vp->y + vp->height) - 1); - int64_t x_max = MIN2(s->offset.x + s->extent.width - 1, - vp->x + vp->width - 1); - - y_max = CLAMP(y_max, 0, INT16_MAX >> 1); - x_max = CLAMP(x_max, 0, INT16_MAX >> 1); - - /* Do this math using int64_t so overflow gets clamped correctly. */ - if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { - y_min = CLAMP((uint64_t) y_min, gfx->render_area.offset.y, max); - x_min = CLAMP((uint64_t) x_min, gfx->render_area.offset.x, max); - y_max = CLAMP((uint64_t) y_max, 0, - gfx->render_area.offset.y + - gfx->render_area.extent.height - 1); - x_max = CLAMP((uint64_t) x_max, 0, - gfx->render_area.offset.x + - gfx->render_area.extent.width - 1); - } - - const struct GENX(SCISSOR_RECT) scissor = { - .ScissorRectangleYMin = y_min, - .ScissorRectangleXMin = x_min, - .ScissorRectangleYMax = y_max, - .ScissorRectangleXMax = x_max - }; - - if (s->extent.width <= 0 || s->extent.height <= 0) { - GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, - &empty_scissor); - } else { - GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, &scissor); - } - } - - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_SCISSOR_STATE_POINTERS), ssp) { - ssp.ScissorRectPointer = scissor_state.offset; - } -} - ALWAYS_INLINE void genX(batch_emit_pipe_control)(struct anv_batch *batch, const struct intel_device_info *devinfo, @@ -3260,91 +2987,6 @@ genX(cmd_buffer_set_preemption)(struct anv_cmd_buffer *cmd_buffer, bool value) #endif } -static void -genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer) -{ -#if GFX_VERx10 >= 120 - /* Wa_16013994831 - Disable preemption during streamout, enable back - * again if XFB not used by the current pipeline. - * - * Although this workaround applies to Gfx12+, we already disable object - * level preemption for another reason in genX_state.c so we can skip this - * for Gfx12. - */ - if (!intel_needs_workaround(cmd_buffer->device->info, 16013994831)) - return; - - if (cmd_buffer->state.gfx.pipeline->uses_xfb) { - genX(cmd_buffer_set_preemption)(cmd_buffer, false); - return; - } - - if (!cmd_buffer->state.gfx.object_preemption) - genX(cmd_buffer_set_preemption)(cmd_buffer, true); -#endif -} - -static void -cmd_buffer_emit_streamout(struct anv_cmd_buffer *cmd_buffer) -{ - const struct vk_dynamic_graphics_state *dyn = - &cmd_buffer->vk.dynamic_graphics_state; - struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - - genX(streamout_prologue)(cmd_buffer); - - anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), - pipeline->gfx8.streamout_state, so) { - so.RenderingDisable = dyn->rs.rasterizer_discard_enable; - so.RenderStreamSelect = dyn->rs.rasterization_stream; -#if INTEL_NEEDS_WA_18022508906 - /* Wa_18022508906 : - * - * SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage: - * - * SOL_INT::Render_Enable = - * (3DSTATE_STREAMOUT::Force_Rending == Force_On) || - * ( - * (3DSTATE_STREAMOUT::Force_Rending != Force_Off) && - * !(3DSTATE_GS::Enable && 3DSTATE_GS::Output Vertex Size == 0) && - * !3DSTATE_STREAMOUT::API_Render_Disable && - * ( - * 3DSTATE_DEPTH_STENCIL_STATE::Stencil_TestEnable || - * 3DSTATE_DEPTH_STENCIL_STATE::Depth_TestEnable || - * 3DSTATE_DEPTH_STENCIL_STATE::Depth_WriteEnable || - * 3DSTATE_PS_EXTRA::PS_Valid || - * 3DSTATE_WM::Legacy Depth_Buffer_Clear || - * 3DSTATE_WM::Legacy Depth_Buffer_Resolve_Enable || - * 3DSTATE_WM::Legacy Hierarchical_Depth_Buffer_Resolve_Enable - * ) - * ) - * - * If SOL_INT::Render_Enable is false, the SO stage will not forward any - * topologies down the pipeline. Which is not what we want for occlusion - * queries. - * - * Here we force rendering to get SOL_INT::Render_Enable when occlusion - * queries are active. - */ - if (!so.RenderingDisable && cmd_buffer->state.gfx.n_occlusion_queries > 0) - so.ForceRendering = Force_on; -#endif - - switch (dyn->rs.provoking_vertex) { - case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT: - so.ReorderMode = LEADING; - break; - - case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT: - so.ReorderMode = TRAILING; - break; - - default: - unreachable("Invalid provoking vertex mode"); - } - } -} - ALWAYS_INLINE static void genX(emit_hs)(struct anv_cmd_buffer *cmd_buffer) { @@ -3632,41 +3274,6 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer) dirty & VK_SHADER_STAGE_ALL_GRAPHICS); } - cmd_buffer_emit_clip(cmd_buffer); - - if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_XFB_ENABLE)) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX)) - cmd_buffer_emit_streamout(cmd_buffer); - - if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_RENDER_TARGETS)) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE)) { - cmd_buffer_emit_viewport(cmd_buffer); - cmd_buffer_emit_depth_viewport(cmd_buffer); - cmd_buffer_emit_scissor(cmd_buffer); - } - - if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY)) { - uint32_t topology; - if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) - topology = _3DPRIM_PATCHLIST(dyn->ts.patch_control_points); - else - topology = genX(vk_to_intel_primitive_type)[dyn->ia.primitive_topology]; - - cmd_buffer->state.gfx.primitive_topology = topology; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) { - vft.PrimitiveTopologyType = topology; - } - } - if (any_dynamic_state_dirty || cmd_buffer->state.gfx.dirty) genX(cmd_buffer_flush_dynamic_state)(cmd_buffer); } diff --git a/src/intel/vulkan/gfx8_cmd_buffer.c b/src/intel/vulkan/gfx8_cmd_buffer.c index 7de7f41..172c596 100644 --- a/src/intel/vulkan/gfx8_cmd_buffer.c +++ b/src/intel/vulkan/gfx8_cmd_buffer.c @@ -31,6 +31,8 @@ #include "genxml/gen_macros.h" #include "genxml/genX_pack.h" +#include "common/intel_guardband.h" +#include "compiler/brw_prim.h" void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) @@ -488,6 +490,361 @@ genX(rasterization_mode)(VkPolygonMode raster_mode, } } +static void +cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer) +{ + const struct vk_dynamic_graphics_state *dyn = + &cmd_buffer->vk.dynamic_graphics_state; + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + + if (!(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) && + !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) && + !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) && + !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT) && + !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_POLYGON_MODE) && + !BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX)) + return; + + anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_CLIP), + pipeline->gfx8.clip, clip) { + /* Take dynamic primitive topology in to account with + * 3DSTATE_CLIP::ViewportXYClipTestEnable + */ + const VkPolygonMode dynamic_raster_mode = + genX(raster_polygon_mode)(pipeline, + dyn->rs.polygon_mode, + dyn->ia.primitive_topology); + const bool xy_clip_test_enable = + (dynamic_raster_mode == VK_POLYGON_MODE_FILL); + + clip.APIMode = dyn->vp.depth_clip_negative_one_to_one ? + APIMODE_OGL : APIMODE_D3D; + clip.ViewportXYClipTestEnable = xy_clip_test_enable; + + ANV_SETUP_PROVOKING_VERTEX(clip, dyn->rs.provoking_vertex); + + /* TODO(mesh): Multiview. */ + if (anv_pipeline_is_primitive(pipeline)) { + const struct brw_vue_prog_data *last = + anv_pipeline_get_last_vue_prog_data(pipeline); + if (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT) { + clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ? + dyn->vp.viewport_count - 1 : 0; + } + } else if (anv_pipeline_is_mesh(pipeline)) { + const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); + if (mesh_prog_data->map.start_dw[VARYING_SLOT_VIEWPORT] >= 0) { + clip.MaximumVPIndex = dyn->vp.viewport_count > 0 ? + dyn->vp.viewport_count - 1 : 0; + } + } + } +} + +static void +genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer) +{ +#if GFX_VERx10 >= 120 + /* Wa_16013994831 - Disable preemption during streamout, enable back + * again if XFB not used by the current pipeline. + * + * Although this workaround applies to Gfx12+, we already disable object + * level preemption for another reason in genX_state.c so we can skip this + * for Gfx12. + */ + if (!intel_needs_workaround(cmd_buffer->device->info, 16013994831)) + return; + + if (cmd_buffer->state.gfx.pipeline->uses_xfb) { + genX(cmd_buffer_set_preemption)(cmd_buffer, false); + return; + } + + if (!cmd_buffer->state.gfx.object_preemption) + genX(cmd_buffer_set_preemption)(cmd_buffer, true); +#endif +} + +static void +cmd_buffer_emit_streamout(struct anv_cmd_buffer *cmd_buffer) +{ + const struct vk_dynamic_graphics_state *dyn = + &cmd_buffer->vk.dynamic_graphics_state; + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + + genX(streamout_prologue)(cmd_buffer); + + anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), + pipeline->gfx8.streamout_state, so) { + so.RenderingDisable = dyn->rs.rasterizer_discard_enable; + so.RenderStreamSelect = dyn->rs.rasterization_stream; +#if INTEL_NEEDS_WA_18022508906 + /* Wa_18022508906 : + * + * SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage: + * + * SOL_INT::Render_Enable = + * (3DSTATE_STREAMOUT::Force_Rending == Force_On) || + * ( + * (3DSTATE_STREAMOUT::Force_Rending != Force_Off) && + * !(3DSTATE_GS::Enable && 3DSTATE_GS::Output Vertex Size == 0) && + * !3DSTATE_STREAMOUT::API_Render_Disable && + * ( + * 3DSTATE_DEPTH_STENCIL_STATE::Stencil_TestEnable || + * 3DSTATE_DEPTH_STENCIL_STATE::Depth_TestEnable || + * 3DSTATE_DEPTH_STENCIL_STATE::Depth_WriteEnable || + * 3DSTATE_PS_EXTRA::PS_Valid || + * 3DSTATE_WM::Legacy Depth_Buffer_Clear || + * 3DSTATE_WM::Legacy Depth_Buffer_Resolve_Enable || + * 3DSTATE_WM::Legacy Hierarchical_Depth_Buffer_Resolve_Enable + * ) + * ) + * + * If SOL_INT::Render_Enable is false, the SO stage will not forward any + * topologies down the pipeline. Which is not what we want for occlusion + * queries. + * + * Here we force rendering to get SOL_INT::Render_Enable when occlusion + * queries are active. + */ + if (!so.RenderingDisable && cmd_buffer->state.gfx.n_occlusion_queries > 0) + so.ForceRendering = Force_on; +#endif + + switch (dyn->rs.provoking_vertex) { + case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT: + so.ReorderMode = LEADING; + break; + + case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT: + so.ReorderMode = TRAILING; + break; + + default: + unreachable("Invalid provoking vertex mode"); + } + } +} + +static void +cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_instance *instance = cmd_buffer->device->physical->instance; + struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; + const struct vk_dynamic_graphics_state *dyn = + &cmd_buffer->vk.dynamic_graphics_state; + uint32_t count = dyn->vp.viewport_count; + const VkViewport *viewports = dyn->vp.viewports; + struct anv_state sf_clip_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); + + const float scale = dyn->vp.depth_clip_negative_one_to_one ? 0.5f : 1.0f; + + for (uint32_t i = 0; i < count; i++) { + const VkViewport *vp = &viewports[i]; + + /* The gfx7 state struct has just the matrix and guardband fields, the + * gfx8 struct adds the min/max viewport fields. */ + struct GENX(SF_CLIP_VIEWPORT) sfv = { + .ViewportMatrixElementm00 = vp->width / 2, + .ViewportMatrixElementm11 = vp->height / 2, + .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) * scale, + .ViewportMatrixElementm30 = vp->x + vp->width / 2, + .ViewportMatrixElementm31 = vp->y + vp->height / 2, + .ViewportMatrixElementm32 = dyn->vp.depth_clip_negative_one_to_one ? + (vp->minDepth + vp->maxDepth) * scale : vp->minDepth, + .XMinClipGuardband = -1.0f, + .XMaxClipGuardband = 1.0f, + .YMinClipGuardband = -1.0f, + .YMaxClipGuardband = 1.0f, + .XMinViewPort = vp->x, + .XMaxViewPort = vp->x + vp->width - 1, + .YMinViewPort = MIN2(vp->y, vp->y + vp->height), + .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1, + }; + + /* Fix depth test misrenderings by lowering translated depth range */ + if (instance->lower_depth_range_rate != 1.0f) + sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate; + + const uint32_t fb_size_max = 1 << 14; + uint32_t x_min = 0, x_max = fb_size_max; + uint32_t y_min = 0, y_max = fb_size_max; + + /* If we have a valid renderArea, include that */ + if (gfx->render_area.extent.width > 0 && + gfx->render_area.extent.height > 0) { + x_min = MAX2(x_min, gfx->render_area.offset.x); + x_max = MIN2(x_max, gfx->render_area.offset.x + + gfx->render_area.extent.width); + y_min = MAX2(y_min, gfx->render_area.offset.y); + y_max = MIN2(y_max, gfx->render_area.offset.y + + gfx->render_area.extent.height); + } + + /* The client is required to have enough scissors for whatever it sets + * as ViewportIndex but it's possible that they've got more viewports + * set from a previous command. Also, from the Vulkan 1.3.207: + * + * "The application must ensure (using scissor if necessary) that + * all rendering is contained within the render area." + * + * If the client doesn't set a scissor, that basically means it + * guarantees everything is in-bounds already. If we end up using a + * guardband of [-1, 1] in that case, there shouldn't be much loss. + * It's theoretically possible that they could do all their clipping + * with clip planes but that'd be a bit odd. + */ + if (i < dyn->vp.scissor_count) { + const VkRect2D *scissor = &dyn->vp.scissors[i]; + x_min = MAX2(x_min, scissor->offset.x); + x_max = MIN2(x_max, scissor->offset.x + scissor->extent.width); + y_min = MAX2(y_min, scissor->offset.y); + y_max = MIN2(y_max, scissor->offset.y + scissor->extent.height); + } + + /* Only bother calculating the guardband if our known render area is + * less than the maximum size. Otherwise, it will calculate [-1, 1] + * anyway but possibly with precision loss. + */ + if (x_min > 0 || x_max < fb_size_max || + y_min > 0 || y_max < fb_size_max) { + intel_calculate_guardband_size(x_min, x_max, y_min, y_max, + sfv.ViewportMatrixElementm00, + sfv.ViewportMatrixElementm11, + sfv.ViewportMatrixElementm30, + sfv.ViewportMatrixElementm31, + &sfv.XMinClipGuardband, + &sfv.XMaxClipGuardband, + &sfv.YMinClipGuardband, + &sfv.YMaxClipGuardband); + } + + GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv); + } + + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) { + clip.SFClipViewportPointer = sf_clip_state.offset; + } +} + +static void +cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer) +{ + const struct vk_dynamic_graphics_state *dyn = + &cmd_buffer->vk.dynamic_graphics_state; + uint32_t count = dyn->vp.viewport_count; + const VkViewport *viewports = dyn->vp.viewports; + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkViewport *vp = &viewports[i]; + + /* From the Vulkan spec: + * + * "It is valid for minDepth to be greater than or equal to + * maxDepth." + */ + float min_depth = MIN2(vp->minDepth, vp->maxDepth); + float max_depth = MAX2(vp->minDepth, vp->maxDepth); + + struct GENX(CC_VIEWPORT) cc_viewport = { + .MinimumDepth = dyn->rs.depth_clamp_enable ? min_depth : 0.0f, + .MaximumDepth = dyn->rs.depth_clamp_enable ? max_depth : 1.0f, + }; + + GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); + } + + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) { + cc.CCViewportPointer = cc_state.offset; + } +} + +static void +cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; + const struct vk_dynamic_graphics_state *dyn = + &cmd_buffer->vk.dynamic_graphics_state; + uint32_t count = dyn->vp.scissor_count; + const VkRect2D *scissors = dyn->vp.scissors; + const VkViewport *viewports = dyn->vp.viewports; + + /* Wa_1409725701: + * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is + * stored as an array of up to 16 elements. The location of first + * element of the array, as specified by Pointer to SCISSOR_RECT, should + * be aligned to a 64-byte boundary. + */ + uint32_t alignment = 64; + struct anv_state scissor_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, alignment); + + for (uint32_t i = 0; i < count; i++) { + const VkRect2D *s = &scissors[i]; + const VkViewport *vp = &viewports[i]; + + /* Since xmax and ymax are inclusive, we have to have xmax < xmin or + * ymax < ymin for empty clips. In case clip x, y, width height are all + * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't + * what we want. Just special case empty clips and produce a canonical + * empty clip. */ + static const struct GENX(SCISSOR_RECT) empty_scissor = { + .ScissorRectangleYMin = 1, + .ScissorRectangleXMin = 1, + .ScissorRectangleYMax = 0, + .ScissorRectangleXMax = 0 + }; + + const int max = 0xffff; + + uint32_t y_min = MAX2(s->offset.y, MIN2(vp->y, vp->y + vp->height)); + uint32_t x_min = MAX2(s->offset.x, vp->x); + int64_t y_max = MIN2(s->offset.y + s->extent.height - 1, + MAX2(vp->y, vp->y + vp->height) - 1); + int64_t x_max = MIN2(s->offset.x + s->extent.width - 1, + vp->x + vp->width - 1); + + y_max = CLAMP(y_max, 0, INT16_MAX >> 1); + x_max = CLAMP(x_max, 0, INT16_MAX >> 1); + + /* Do this math using int64_t so overflow gets clamped correctly. */ + if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + y_min = CLAMP((uint64_t) y_min, gfx->render_area.offset.y, max); + x_min = CLAMP((uint64_t) x_min, gfx->render_area.offset.x, max); + y_max = CLAMP((uint64_t) y_max, 0, + gfx->render_area.offset.y + + gfx->render_area.extent.height - 1); + x_max = CLAMP((uint64_t) x_max, 0, + gfx->render_area.offset.x + + gfx->render_area.extent.width - 1); + } + + const struct GENX(SCISSOR_RECT) scissor = { + .ScissorRectangleYMin = y_min, + .ScissorRectangleXMin = x_min, + .ScissorRectangleYMax = y_max, + .ScissorRectangleXMax = x_max + }; + + if (s->extent.width <= 0 || s->extent.height <= 0) { + GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, + &empty_scissor); + } else { + GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, &scissor); + } + } + + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_SCISSOR_STATE_POINTERS), ssp) { + ssp.ScissorRectPointer = scissor_state.offset; + } +} + void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) { @@ -495,6 +852,41 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) struct vk_dynamic_graphics_state *dyn = &cmd_buffer->vk.dynamic_graphics_state; + cmd_buffer_emit_clip(cmd_buffer); + + if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_XFB_ENABLE)) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX)) + cmd_buffer_emit_streamout(cmd_buffer); + + if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_RENDER_TARGETS)) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE)) { + cmd_buffer_emit_viewport(cmd_buffer); + cmd_buffer_emit_depth_viewport(cmd_buffer); + cmd_buffer_emit_scissor(cmd_buffer); + } + + if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY)) { + uint32_t topology; + if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) + topology = _3DPRIM_PATCHLIST(dyn->ts.patch_control_points); + else + topology = genX(vk_to_intel_primitive_type)[dyn->ia.primitive_topology]; + + cmd_buffer->state.gfx.primitive_topology = topology; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) { + vft.PrimitiveTopologyType = topology; + } + } + if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI)) { const uint32_t ve_count = -- 2.7.4