intel: Move the D16 workarounds out of ISL
authorNanley Chery <nanley.g.chery@intel.com>
Tue, 15 Jun 2021 17:38:38 +0000 (10:38 -0700)
committerMarge Bot <eric+marge@anholt.net>
Fri, 20 Aug 2021 17:50:35 +0000 (17:50 +0000)
Implement the workarounds in anv and iris instead.

Before this commit, ISL unconditionally modified workaround registers
while filling out depth stencil state. To account for this, drivers
unconditionally stalled prior to emitting depth stencil packets. This
hurt performance.

By having the drivers perform the workarounds, they can choose when to
modify the relevant registers. The drivers now avoid emitting the
workaround for NULL depth buffers. This reduces stalls and leads to
better performance.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> (the ISL/Anv bits)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (the Iris bits)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11454>

src/gallium/drivers/iris/iris_blorp.c
src/gallium/drivers/iris/iris_state.c
src/intel/isl/isl.c
src/intel/isl/isl_emit_depth_stencil.c
src/intel/vulkan/genX_blorp_exec.c
src/intel/vulkan/genX_cmd_buffer.c

index ec8fa81a54f98de526e7eb225781f29fa506da87..521f87a9c177a36e2777cfce19adc9eba8fc871a 100644 (file)
@@ -274,21 +274,9 @@ iris_blorp_exec(struct blorp_batch *blorp_batch,
                                 PIPE_CONTROL_STALL_AT_SCOREBOARD);
 #endif
 
-#if GFX_VERx10 == 120
-   if (!(blorp_batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL)) {
-      /* Wa_14010455700
-       *
-       * ISL will change some CHICKEN registers depending on the depth surface
-       * format, along with emitting the depth and stencil packets. In that
-       * case, we want to do a depth flush and stall, so the pipeline is not
-       * using these settings while we change the registers.
-       */
-      iris_emit_end_of_pipe_sync(batch,
-                                 "Workaround: Stop pipeline for 14010455700",
-                                 PIPE_CONTROL_DEPTH_STALL |
-                                 PIPE_CONTROL_DEPTH_CACHE_FLUSH);
-   }
-#endif
+   if (params->depth.enabled &&
+       !(blorp_batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))
+      genX(emit_depth_state_workarounds)(ice, batch, &params->depth.surf);
 
    /* Flush the render cache in cases where the same surface is used with
     * different aux modes, which can lead to GPU hangs.  Invalidation of
index 137817ed40429c2fcef7dae3ad751198a49c25b0..ec7bee062e30ab0c5d03b17b70b1496a39b8b8db 100644 (file)
@@ -1140,8 +1140,7 @@ struct iris_depth_buffer_state {
    uint32_t packets[GENX(3DSTATE_DEPTH_BUFFER_length) +
                     GENX(3DSTATE_STENCIL_BUFFER_length) +
                     GENX(3DSTATE_HIER_DEPTH_BUFFER_length) +
-                    GENX(3DSTATE_CLEAR_PARAMS_length) +
-                    GENX(MI_LOAD_REGISTER_IMM_length) * 2];
+                    GENX(3DSTATE_CLEAR_PARAMS_length)];
 };
 
 /**
@@ -6252,21 +6251,11 @@ iris_upload_dirty_render_state(struct iris_context *ice,
          }
       }
 
-#if GFX_VERx10 == 120
-      /* Wa_14010455700
-       *
-       * ISL will change some CHICKEN registers depending on the depth surface
-       * format, along with emitting the depth and stencil packets. In that
-       * case, we want to do a depth flush and stall, so the pipeline is not
-       * using these settings while we change the registers.
-       */
-      iris_emit_end_of_pipe_sync(batch,
-                                 "Workaround: Stop pipeline for 14010455700",
-                                 PIPE_CONTROL_DEPTH_STALL |
-                                 PIPE_CONTROL_DEPTH_CACHE_FLUSH);
-#endif
-
       iris_batch_emit(batch, cso_z->packets, batch->screen->isl_dev.ds.size);
+
+      if (zres)
+         genX(emit_depth_state_workarounds)(ice, batch, &zres->surf);
+
       if (GFX_VER >= 12) {
          /* Wa_1408224581
           *
index 205e4759fde947a4a4d5ea038c77e6829c397d97..80fdc4d6c7a9b950af85aa5489a6e34dc77eb0ac 100644 (file)
@@ -266,10 +266,6 @@ isl_device_init(struct isl_device *dev,
       dev->ds.hiz_offset = 0;
    }
 
-   if (ISL_GFX_VERX10(dev) == 120) {
-      dev->ds.size += GFX12_MI_LOAD_REGISTER_IMM_length * 4 * 2;
-   }
-
    isl_device_setup_mocs(dev);
 }
 
index c337205bc710e1d4c54b397baaddb941489cf594..e1a367f21b83bb6de81371287c1ae078ba2cfeec 100644 (file)
@@ -304,50 +304,6 @@ isl_genX(emit_depth_stencil_hiz_s)(const struct isl_device *dev, void *batch,
    GENX(3DSTATE_HIER_DEPTH_BUFFER_pack)(NULL, dw, &hiz);
    dw += GENX(3DSTATE_HIER_DEPTH_BUFFER_length);
 
-#if GFX_VERx10 == 120
-   /* Wa_14010455700
-    *
-    * To avoid sporadic corruptions “Set 0x7010[9] when Depth Buffer Surface
-    * Format is D16_UNORM , surface type is not NULL & 1X_MSAA”.
-    */
-   bool enable_14010455700 =
-      info->depth_surf && info->depth_surf->samples == 1 &&
-      db.SurfaceType != SURFTYPE_NULL && db.SurfaceFormat == D16_UNORM;
-   struct GENX(COMMON_SLICE_CHICKEN1) chicken1 = {
-      .HIZPlaneOptimizationdisablebit = enable_14010455700,
-      .HIZPlaneOptimizationdisablebitMask = true,
-   };
-   uint32_t chicken1_dw;
-   GENX(COMMON_SLICE_CHICKEN1_pack)(NULL, &chicken1_dw, &chicken1);
-
-   struct GENX(MI_LOAD_REGISTER_IMM) lri = {
-      GENX(MI_LOAD_REGISTER_IMM_header),
-      .RegisterOffset = GENX(COMMON_SLICE_CHICKEN1_num),
-      .DataDWord = chicken1_dw,
-   };
-   GENX(MI_LOAD_REGISTER_IMM_pack)(NULL, dw, &lri);
-   dw += GENX(MI_LOAD_REGISTER_IMM_length);
-
-   /* Wa_1806527549
-    *
-    * Set HIZ_CHICKEN (7018h) bit 13 = 1 when depth buffer is D16_UNORM.
-    */
-   struct GENX(HIZ_CHICKEN) hiz_chicken = {
-      .HZDepthTestLEGEOptimizationDisable = db.SurfaceFormat == D16_UNORM,
-      .HZDepthTestLEGEOptimizationDisableMask = true,
-   };
-   uint32_t hiz_chicken_dw;
-   GENX(HIZ_CHICKEN_pack)(NULL, &hiz_chicken_dw, &hiz_chicken);
-
-   struct GENX(MI_LOAD_REGISTER_IMM) lri2 = {
-      GENX(MI_LOAD_REGISTER_IMM_header),
-      .RegisterOffset = GENX(HIZ_CHICKEN_num),
-      .DataDWord = hiz_chicken_dw,
-   };
-   GENX(MI_LOAD_REGISTER_IMM_pack)(NULL, dw, &lri2);
-   dw += GENX(MI_LOAD_REGISTER_IMM_length);
-#endif
-
    GENX(3DSTATE_CLEAR_PARAMS_pack)(NULL, dw, &clear);
    dw += GENX(3DSTATE_CLEAR_PARAMS_length);
 #endif
index 9ca1fb66284c4cd437957c5901b5524f12efec3e..ced154f72e125a17dea47f54196d2b16a31ee56a 100644 (file)
@@ -264,21 +264,9 @@ genX(blorp_exec)(struct blorp_batch *batch,
                              "before blorp BTI change");
 #endif
 
-#if GFX_VERx10 == 120
-   if (!(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL)) {
-      /* Wa_14010455700
-       *
-       * ISL will change some CHICKEN registers depending on the depth surface
-       * format, along with emitting the depth and stencil packets. In that
-       * case, we want to do a depth flush and stall, so the pipeline is not
-       * using these settings while we change the registers.
-       */
-      cmd_buffer->state.pending_pipe_bits |=
-         ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
-         ANV_PIPE_DEPTH_STALL_BIT |
-         ANV_PIPE_END_OF_PIPE_SYNC_BIT;
-   }
-#endif
+   if (params->depth.enabled &&
+       !(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))
+      genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, &params->depth.surf);
 
 #if GFX_VER == 7
    /* The MI_LOAD/STORE_REGISTER_MEM commands which BLORP uses to implement
index cbf0b12f05441026e8d68259664347bcf95980c3..5c287f836edfd14f01f39e06d935b12c4f86f34a 100644 (file)
@@ -5855,6 +5855,9 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
 
    isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
 
+   if (info.depth_surf)
+      genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, info.depth_surf);
+
    if (GFX_VER >= 12) {
       cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
@@ -6293,22 +6296,6 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
                              "change RT");
 #endif
 
-#if GFX_VERx10 == 120
-   /* Wa_14010455700
-    *
-    * ISL will change some CHICKEN registers depending on the depth surface
-    * format, along with emitting the depth and stencil packets. In that case,
-    * we want to do a depth flush and stall, so the pipeline is not using these
-    * settings while we change the registers.
-    */
-   anv_add_pending_pipe_bits(cmd_buffer,
-                             ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
-                             ANV_PIPE_DEPTH_STALL_BIT |
-                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
-                             "change DS");
-   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
-#endif
-
    cmd_buffer_emit_depth_stencil(cmd_buffer);
 }