From 9ecc26ff276099ed2eb3fe22d417c84f49700d20 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Jul 2022 21:02:35 -0500 Subject: [PATCH] anv: Do depth/stencil optimization for dynamic depth/stencil When we made depth/stencil dynamic, we lost the optimization. This is particularly important for cases where the stencil test is enabled but never writes anything as certain combinations with discard can cause the stencil write (which doesn't do anything) to get moved late which can be a measurable perf hit. According to 028e1137e67b ("anv/pipeline: Be smarter about depth/stencil state", it was a couple percent for DOTA2 on Broadwell back in the day. No idea how it affects current titles. This may also improve the depth/stncil PMA workarounds on Gen8 and Gen9 since they're now looking at optimized depth/stencil state. Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/vulkan/anv_private.h | 4 -- src/intel/vulkan/genX_pipeline.c | 33 --------- src/intel/vulkan/gfx7_cmd_buffer.c | 52 +++++++------- src/intel/vulkan/gfx8_cmd_buffer.c | 144 ++++++++++++++++++------------------- 4 files changed, 100 insertions(+), 133 deletions(-) diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 5babcf1..a00ac62 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -3106,10 +3106,6 @@ struct anv_graphics_pipeline { uint32_t view_mask; uint32_t instance_multiplier; - bool writes_depth; - bool depth_test_enable; - bool writes_stencil; - bool stencil_test_enable; bool depth_clamp_enable; bool depth_clip_enable; bool kill_pixel; diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 5fb8054..2ab9b2c 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -974,38 +974,6 @@ const uint32_t genX(vk_to_intel_primitive_type)[] = { [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, }; -static void -emit_ds_state(struct anv_graphics_pipeline *pipeline, - const struct vk_depth_stencil_state *ds_in, - const struct vk_render_pass_state *rp) -{ - if (ds_in == NULL) { - /* We're going to OR this together with the dynamic state. We need - * to make sure it's initialized to something useful. - */ - pipeline->writes_stencil = false; - pipeline->stencil_test_enable = false; - pipeline->writes_depth = false; - pipeline->depth_test_enable = false; - return; - } - - VkImageAspectFlags ds_aspects = 0; - if (rp != NULL) { - if (rp->depth_attachment_format != VK_FORMAT_UNDEFINED) - ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; - if (rp->stencil_attachment_format != VK_FORMAT_UNDEFINED) - ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - } - - struct vk_depth_stencil_state ds = *ds_in; - vk_optimize_depth_stencil_state(&ds, ds_aspects, false); - pipeline->writes_stencil = ds.stencil.write_enable; - pipeline->stencil_test_enable = ds.stencil.test_enable; - pipeline->writes_depth = ds.depth.write_enable; - pipeline->depth_test_enable = ds.depth.test_enable; -} - static bool is_dual_src_blend_factor(VkBlendFactor factor) { @@ -2336,7 +2304,6 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline, emit_rs_state(pipeline, state->ia, state->rs, state->ms, state->rp, urb_deref_block_size); emit_ms_state(pipeline, state->ms); - emit_ds_state(pipeline, state->ds, state->rp); emit_cb_state(pipeline, state->cb, state->ms); compute_kill_pixel(pipeline, state->ms, state->rp); diff --git a/src/intel/vulkan/gfx7_cmd_buffer.c b/src/intel/vulkan/gfx7_cmd_buffer.c index 32bf964..5522179 100644 --- a/src/intel/vulkan/gfx7_cmd_buffer.c +++ b/src/intel/vulkan/gfx7_cmd_buffer.c @@ -145,33 +145,37 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) { uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)]; + VkImageAspectFlags ds_aspects = 0; + if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED) + ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; + if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED) + ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + + struct vk_depth_stencil_state opt_ds = dyn->ds; + vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true); + struct GENX(DEPTH_STENCIL_STATE) depth_stencil = { .DoubleSidedStencilEnable = true, - .StencilTestMask = dyn->ds.stencil.front.compare_mask & 0xff, - .StencilWriteMask = dyn->ds.stencil.front.write_mask & 0xff, - - .BackfaceStencilTestMask = dyn->ds.stencil.back.compare_mask & 0xff, - .BackfaceStencilWriteMask = dyn->ds.stencil.back.write_mask & 0xff, - - .StencilBufferWriteEnable = - (dyn->ds.stencil.back.write_mask || - dyn->ds.stencil.front.write_mask) && - dyn->ds.stencil.test_enable, - - .DepthTestEnable = dyn->ds.depth.test_enable, - .DepthBufferWriteEnable = dyn->ds.depth.test_enable && - dyn->ds.depth.write_enable, - .DepthTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.depth.compare_op], - .StencilTestEnable = dyn->ds.stencil.test_enable, - .StencilFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.fail], - .StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.pass], - .StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.depth_fail], - .StencilTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.stencil.front.op.compare], - .BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.fail], - .BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.pass], - .BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.depth_fail], - .BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.stencil.back.op.compare], + .StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff, + .StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff, + + .BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff, + .BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff, + + .DepthTestEnable = opt_ds.depth.test_enable, + .DepthBufferWriteEnable = opt_ds.depth.write_enable, + .DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op], + .StencilTestEnable = opt_ds.stencil.test_enable, + .StencilBufferWriteEnable = opt_ds.stencil.write_enable, + .StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail], + .StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass], + .StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail], + .StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare], + .BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail], + .BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass], + .BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail], + .BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare], }; GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil); diff --git a/src/intel/vulkan/gfx8_cmd_buffer.c b/src/intel/vulkan/gfx8_cmd_buffer.c index d2ef881..131c1e4 100644 --- a/src/intel/vulkan/gfx8_cmd_buffer.c +++ b/src/intel/vulkan/gfx8_cmd_buffer.c @@ -107,7 +107,8 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) } UNUSED static bool -want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer) +want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer, + const struct vk_depth_stencil_state *ds) { assert(GFX_VER == 8); @@ -174,7 +175,7 @@ want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer) */ /* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */ - if (!pipeline->depth_test_enable) + if (!ds->depth.test_enable) return false; /* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels || @@ -190,17 +191,15 @@ want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer) * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) || * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF)) */ - return (pipeline->kill_pixel && (pipeline->writes_depth || - pipeline->writes_stencil)) || + return (pipeline->kill_pixel && (ds->depth.write_enable || + ds->stencil.write_enable)) || wm_prog_data->computed_depth_mode != PSCDEPTH_OFF; } UNUSED static bool -want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer) +want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer, + const struct vk_depth_stencil_state *ds) { - const struct vk_dynamic_graphics_state *dyn = - &cmd_buffer->vk.dynamic_graphics_state; - if (GFX_VER > 9) return false; assert(GFX_VER == 9); @@ -286,19 +285,13 @@ want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer) /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable */ - const bool stc_test_en = - cmd_buffer->state.gfx.stencil_att.iview != NULL && - pipeline->stencil_test_enable; + const bool stc_test_en = ds->stencil.test_enable; /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE) */ - const bool stc_write_en = - cmd_buffer->state.gfx.stencil_att.iview != NULL && - (dyn->ds.stencil.front.write_mask || - dyn->ds.stencil.back.write_mask) && - pipeline->writes_stencil; + const bool stc_write_en = ds->stencil.write_enable; /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */ const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil; @@ -433,37 +426,41 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) { + VkImageAspectFlags ds_aspects = 0; + if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED) + ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; + if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED) + ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + + struct vk_depth_stencil_state opt_ds = dyn->ds; + vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true); + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) { ds.DoubleSidedStencilEnable = true; - ds.StencilTestMask = dyn->ds.stencil.front.compare_mask & 0xff; - ds.StencilWriteMask = dyn->ds.stencil.front.write_mask & 0xff; - - ds.BackfaceStencilTestMask = dyn->ds.stencil.back.compare_mask & 0xff; - ds.BackfaceStencilWriteMask = dyn->ds.stencil.back.write_mask & 0xff; - - ds.StencilBufferWriteEnable = - (dyn->ds.stencil.front.write_mask || - dyn->ds.stencil.back.write_mask) && - dyn->ds.stencil.test_enable; - - ds.DepthTestEnable = dyn->ds.depth.test_enable; - ds.DepthBufferWriteEnable = dyn->ds.depth.test_enable && - dyn->ds.depth.write_enable; - ds.DepthTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.depth.compare_op]; - ds.StencilTestEnable = dyn->ds.stencil.test_enable; - ds.StencilFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.fail]; - ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.pass]; - ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.depth_fail]; - ds.StencilTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.stencil.front.op.compare]; - ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.fail]; - ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.pass]; - ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.depth_fail]; - ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.stencil.back.op.compare]; + ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff; + ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff; + + ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff; + ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff; + + ds.DepthTestEnable = opt_ds.depth.test_enable; + ds.DepthBufferWriteEnable = opt_ds.depth.write_enable; + ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op]; + ds.StencilTestEnable = opt_ds.stencil.test_enable; + ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable; + ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail]; + ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass]; + ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail]; + ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare]; + ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail]; + ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass]; + ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail]; + ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare]; } - genX(cmd_buffer_enable_pma_fix)(cmd_buffer, - want_depth_pma_fix(cmd_buffer)); + const bool pma = want_depth_pma_fix(cmd_buffer, &opt_ds); + genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma); } #else if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) { @@ -495,41 +492,44 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) { + VkImageAspectFlags ds_aspects = 0; + if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED) + ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; + if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED) + ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + + struct vk_depth_stencil_state opt_ds = dyn->ds; + vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true); + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) { ds.DoubleSidedStencilEnable = true; - ds.StencilTestMask = dyn->ds.stencil.front.compare_mask & 0xff; - ds.StencilWriteMask = dyn->ds.stencil.front.write_mask & 0xff; - - ds.BackfaceStencilTestMask = dyn->ds.stencil.back.compare_mask & 0xff; - ds.BackfaceStencilWriteMask = dyn->ds.stencil.back.write_mask & 0xff; - - ds.StencilReferenceValue = dyn->ds.stencil.front.reference & 0xff; - ds.BackfaceStencilReferenceValue = dyn->ds.stencil.back.reference & 0xff; - - ds.StencilBufferWriteEnable = - (dyn->ds.stencil.front.write_mask || - dyn->ds.stencil.back.write_mask) && - dyn->ds.stencil.test_enable; - - ds.DepthTestEnable = dyn->ds.depth.test_enable; - ds.DepthBufferWriteEnable = dyn->ds.depth.test_enable && - dyn->ds.depth.write_enable; - ds.DepthTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.depth.compare_op]; - ds.StencilTestEnable = dyn->ds.stencil.test_enable; - ds.StencilFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.fail]; - ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.pass]; - ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.depth_fail]; - ds.StencilTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.stencil.front.op.compare]; - ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.fail]; - ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.pass]; - ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.depth_fail]; - ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.stencil.back.op.compare]; - + ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff; + ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff; + + ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff; + ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff; + + ds.StencilReferenceValue = opt_ds.stencil.front.reference & 0xff; + ds.BackfaceStencilReferenceValue = opt_ds.stencil.back.reference & 0xff; + + ds.DepthTestEnable = opt_ds.depth.test_enable; + ds.DepthBufferWriteEnable = opt_ds.depth.write_enable; + ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op]; + ds.StencilTestEnable = opt_ds.stencil.test_enable; + ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable; + ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail]; + ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass]; + ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail]; + ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare]; + ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail]; + ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass]; + ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail]; + ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare]; } - genX(cmd_buffer_enable_pma_fix)(cmd_buffer, - want_stencil_pma_fix(cmd_buffer)); + const bool pma = want_stencil_pma_fix(cmd_buffer, &opt_ds); + genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma); } #endif -- 2.7.4