turnip: apply workaround for depth bounds test without depth test
authorDanylo Piliaiev <dpiliaiev@igalia.com>
Tue, 17 Aug 2021 11:59:56 +0000 (14:59 +0300)
committerMarge Bot <eric+marge@anholt.net>
Thu, 19 Aug 2021 10:25:58 +0000 (10:25 +0000)
On some GPUs when:
- depth bounds test is enabled
- depth test is disabled
- depth attachment uses UBWC in sysmem mode
GPU hangs. As a workaround we should enable z test. That's what blob
is doing for a630. And since we enable z test we should make it always pass.

Blob doesn't emit this workaround on a650 and a660. Untested on a640.

Fixes:
 dEQP-VK.pipeline.extended_dynamic_state.two_draws_static.depth_bounds_test_disable
 dEQP-VK.pipeline.extended_dynamic_state.two_draws_dynamic.depth_bounds_test_disable
 dEQP-VK.dynamic_state.ds_state.depth_bounds_1

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12407>

src/freedreno/common/freedreno_dev_info.h
src/freedreno/common/freedreno_devices.py
src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_pipeline.c
src/freedreno/vulkan/tu_private.h

index 5bec6b0..d005bcd 100644 (file)
@@ -86,6 +86,11 @@ struct fd_dev_info {
           */
          bool indirect_draw_wfm_quirk;
 
+         /* On some GPUs, the depth test needs to be enabled when the
+          * depth bounds test is enabled and the depth attachment uses UBWC.
+          */
+         bool depth_bounds_require_depth_test_quirk;
+
          bool has_tex_filter_cubic;
 
          bool has_sample_locations;
index e02e4a0..e1eca08 100644 (file)
@@ -200,6 +200,7 @@ a6xx_gen1 = dict(
         reg_size_vec4 = 96,
         ccu_cntl_gmem_unk2 = True,
         indirect_draw_wfm_quirk = True,
+        depth_bounds_require_depth_test_quirk = True,
     )
 
 # a640, a680:
@@ -209,6 +210,7 @@ a6xx_gen2 = dict(
         supports_multiview_mask = True,
         has_z24uint_s8uint = True,
         indirect_draw_wfm_quirk = True,
+        depth_bounds_require_depth_test_quirk = True, # TODO: check if true
     )
 
 # a650:
index 5903dad..e8801b2 100644 (file)
@@ -471,6 +471,24 @@ tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1)
                    A6XX_SP_TP_WINDOW_OFFSET(.x = x1, .y = y1));
 }
 
+void
+tu6_apply_depth_bounds_workaround(struct tu_device *device,
+                                  uint32_t *rb_depth_cntl)
+{
+   return;
+   if (!device->physical_device->info->a6xx.depth_bounds_require_depth_test_quirk)
+      return;
+
+   /* On some GPUs it is necessary to enable z test for depth bounds test when
+    * UBWC is enabled. Otherwise, the GPU would hang. FUNC_ALWAYS is required to
+    * pass z test. Relevant tests:
+    *  dEQP-VK.pipeline.extended_dynamic_state.two_draws_dynamic.depth_bounds_test_disable
+    *  dEQP-VK.dynamic_state.ds_state.depth_bounds_1
+    */
+   *rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE |
+                     A6XX_RB_DEPTH_CNTL_ZFUNC(FUNC_ALWAYS);
+}
+
 static void
 tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state)
 {
@@ -3743,6 +3761,10 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
           (rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE))
          rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE;
 
+      if ((rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE) &&
+          !(rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE))
+         tu6_apply_depth_bounds_workaround(cmd->device, &rb_depth_cntl);
+
       if (pipeline->rb_depth_cntl_disable)
          rb_depth_cntl = 0;
 
index 12a9e3e..fc4afd9 100644 (file)
@@ -2806,6 +2806,9 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
 
       if (ds_info->depthBoundsTestEnable)
          rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE;
+
+      if (ds_info->depthBoundsTestEnable && !ds_info->depthTestEnable)
+         tu6_apply_depth_bounds_workaround(builder->device, &rb_depth_cntl);
    } else {
       /* if RB_DEPTH_CNTL is set dynamically, we need to make sure it is set
        * to 0 when this pipeline is used, as enabling depth test when there
index d87d64a..9ae3d16 100644 (file)
@@ -1250,6 +1250,9 @@ void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);
 
 void tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
 
+void tu6_apply_depth_bounds_workaround(struct tu_device *device,
+                                       uint32_t *rb_depth_cntl);
+
 struct tu_pvtmem_config {
    uint64_t iova;
    uint32_t per_fiber_size;