turnip: Use the simplified stencil write flags for the LRZ-allowed check.
authorEmma Anholt <emma@anholt.net>
Mon, 19 Sep 2022 21:58:36 +0000 (14:58 -0700)
committerMarge Bot <emma+marge@anholt.net>
Wed, 21 Sep 2022 17:18:07 +0000 (17:18 +0000)
Traces of GLES games that ANGLE has taken frequently have no-op stencil
writes, which ANGLE and Zink both pass straight through.  Given that we
support dynamic stencil state updates via tu_CmdSetStencil*(), draw time
really is the time for deciding this state unfortunately.

Reuse the fancier stencil write enables check from "can we do early z?" in
"can we do LRZ?".  This gets one set of draws in among_us to have LRZ, but
I don't see a detectable performance difference.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18691>

src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_cmd_buffer.h
src/freedreno/vulkan/tu_lrz.c

index e544cee..ed9d556 100644 (file)
@@ -4208,26 +4208,22 @@ tu6_emit_consts(struct tu_cmd_buffer *cmd,
    return tu_cs_end_draw_state(&cmd->sub_cs, &cs);
 }
 
-static bool
-tu6_writes_depth(struct tu_cmd_buffer *cmd, bool depth_test_enable)
-{
-   bool depth_write_enable =
-      cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
-
-   VkCompareOp depth_compare_op =
-      (cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_ZFUNC__MASK) >> A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT;
-
-   bool depth_compare_op_writes = depth_compare_op != VK_COMPARE_OP_NEVER;
-
-   return depth_test_enable && depth_write_enable && depth_compare_op_writes;
-}
-
-static bool
-tu6_writes_stencil(struct tu_cmd_buffer *cmd)
+/* Various frontends (ANGLE, zink at least) will enable stencil testing with
+ * what works out to be no-op writes.  Simplify what they give us into flags
+ * that LRZ can use.
+ */
+static void
+tu6_update_simplified_stencil_state(struct tu_cmd_buffer *cmd)
 {
    bool stencil_test_enable =
       cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE;
 
+   if (!stencil_test_enable) {
+      cmd->state.stencil_front_write = false;
+      cmd->state.stencil_back_write = false;
+      return;
+   }
+
    bool stencil_front_writemask =
       (cmd->state.pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) ?
       (cmd->state.dynamic_stencil_wrmask & 0xff) :
@@ -4261,9 +4257,30 @@ tu6_writes_stencil(struct tu_cmd_buffer *cmd)
       back_fail_op != VK_STENCIL_OP_KEEP ||
       back_depth_fail_op != VK_STENCIL_OP_KEEP;
 
-   return stencil_test_enable &&
-      ((stencil_front_writemask && stencil_front_op_writes) ||
-       (stencil_back_writemask && stencil_back_op_writes));
+   cmd->state.stencil_front_write =
+      stencil_front_op_writes && stencil_front_writemask;
+   cmd->state.stencil_back_write =
+      stencil_back_op_writes && stencil_back_writemask;
+}
+
+static bool
+tu6_writes_depth(struct tu_cmd_buffer *cmd, bool depth_test_enable)
+{
+   bool depth_write_enable =
+      cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
+
+   VkCompareOp depth_compare_op =
+      (cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_ZFUNC__MASK) >> A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT;
+
+   bool depth_compare_op_writes = depth_compare_op != VK_COMPARE_OP_NEVER;
+
+   return depth_test_enable && depth_write_enable && depth_compare_op_writes;
+}
+
+static bool
+tu6_writes_stencil(struct tu_cmd_buffer *cmd)
+{
+   return cmd->state.stencil_front_write || cmd->state.stencil_back_write;
 }
 
 static void
@@ -4394,6 +4411,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
 
       cmd->state.lrz_and_depth_plane_state =
          tu_cs_draw_state(&cmd->sub_cs, &cs, size);
+      tu6_update_simplified_stencil_state(cmd);
       tu6_emit_lrz(cmd, &cs);
       tu6_build_depth_plane_z_mode(cmd, &cs);
    }
index ef679e3..ae359ce 100644 (file)
@@ -325,6 +325,8 @@ struct tu_cmd_state
    uint32_t dynamic_stencil_mask;
    uint32_t dynamic_stencil_wrmask;
    uint32_t dynamic_stencil_ref;
+   bool stencil_front_write;
+   bool stencil_back_write;
 
    uint32_t gras_su_cntl, rb_depth_cntl, rb_stencil_cntl;
    uint32_t pc_raster_cntl, vpc_unknown_9107;
index e6cdced..18a5d15 100644 (file)
@@ -802,16 +802,6 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd,
    /* Invalidate LRZ and disable write if stencil test is enabled */
    bool stencil_test_enable = cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE;
    if (!disable_lrz && stencil_test_enable) {
-      bool stencil_front_writemask =
-         (pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) ?
-         (cmd->state.dynamic_stencil_wrmask & 0xff) :
-         (pipeline->ds.stencil_wrmask & 0xff);
-
-      bool stencil_back_writemask =
-         (pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) ?
-         ((cmd->state.dynamic_stencil_wrmask & 0xff00) >> 8) :
-         (pipeline->ds.stencil_wrmask & 0xff00) >> 8;
-
       VkCompareOp stencil_front_compare_op =
          (cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_FUNC__MASK) >> A6XX_RB_STENCIL_CONTROL_FUNC__SHIFT;
 
@@ -821,11 +811,11 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd,
       bool lrz_allowed = true;
       lrz_allowed = lrz_allowed && tu6_stencil_op_lrz_allowed(
                                       &gras_lrz_cntl, stencil_front_compare_op,
-                                      stencil_front_writemask);
+                                      cmd->state.stencil_front_write);
 
       lrz_allowed = lrz_allowed && tu6_stencil_op_lrz_allowed(
                                       &gras_lrz_cntl, stencil_back_compare_op,
-                                      stencil_back_writemask);
+                                      cmd->state.stencil_back_write);
 
       /* Without depth write it's enough to make sure that depth test
        * is executed after stencil test, so temporary disabling LRZ is enough.