intel: factor out dispatch PS enabling logic
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Mon, 5 Dec 2022 17:26:40 +0000 (19:26 +0200)
committerLionel Landwerlin <lionel.g.landwerlin@intel.com>
Mon, 5 Dec 2022 22:37:47 +0000 (00:37 +0200)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Tested-by: Mark Janes <markjanes@swizzler.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20169>

src/gallium/drivers/crocus/crocus_state.c
src/gallium/drivers/iris/iris_state.c
src/intel/blorp/blorp_genX_exec.h
src/intel/compiler/brw_compiler.h
src/intel/compiler/brw_fs.cpp
src/intel/vulkan/genX_pipeline.c
src/intel/vulkan_hasvk/genX_pipeline.c

index 2068561fae518908588c043b028e093572cb8131..749e714c4dfb4dda0abce9214b81202eb2e1494b 100644 (file)
@@ -6446,9 +6446,11 @@ crocus_upload_dirty_render_state(struct crocus_context *ice,
           */
          ps.VectorMaskEnable = GFX_VER >= 8 && wm_prog_data->uses_vmask;
 
-         ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
-         ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
-         ps._32PixelDispatchEnable = wm_prog_data->dispatch_32;
+         brw_fs_get_dispatch_enables(&batch->screen->devinfo, wm_prog_data,
+                                     ice->state.framebuffer.samples,
+                                     &ps._8PixelDispatchEnable,
+                                     &ps._16PixelDispatchEnable,
+                                     &ps._32PixelDispatchEnable);
 
          ps.DispatchGRFStartRegisterForConstantSetupData0 =
             brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
index cb131475a2707643f84b58770a19cab1cb6bd747..8b5ad6cbdbc20a484eb3ac092164c719b37903c7 100644 (file)
@@ -6224,23 +6224,11 @@ iris_upload_dirty_render_state(struct iris_context *ice,
 
             uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0};
             _iris_pack_command(batch, GENX(3DSTATE_PS), ps_state, ps) {
-               ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
-               ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
-               ps._32PixelDispatchEnable = wm_prog_data->dispatch_32;
-
-              /* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say:
-               *
-               *    "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16,
-               *     SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch
-               *     mode."
-               *
-               * 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8.
-               */
-               if (GFX_VER >= 9 && cso_fb->samples == 16 &&
-                   !wm_prog_data->persample_dispatch) {
-                  assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);
-                  ps._32PixelDispatchEnable = false;
-               }
+               brw_fs_get_dispatch_enables(&screen->devinfo, wm_prog_data,
+                                           cso_fb->samples,
+                                           &ps._8PixelDispatchEnable,
+                                           &ps._16PixelDispatchEnable,
+                                           &ps._32PixelDispatchEnable);
 
                ps.DispatchGRFStartRegisterForConstantSetupData0 =
                   brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
index 0f68e2c225c6a4557f0a6db890fb9605855c6637..2cb8fb0baeeb6cc6ded1b89ef8de72123b73286d 100644 (file)
@@ -838,6 +838,7 @@ blorp_emit_ps_config(struct blorp_batch *batch,
     */
 
 #if GFX_VER >= 8
+   const struct intel_device_info *devinfo = batch->blorp->compiler->devinfo;
 
    blorp_emit(batch, GENX(3DSTATE_WM), wm);
 
@@ -854,23 +855,11 @@ blorp_emit_ps_config(struct blorp_batch *batch,
          ps.SamplerCount = 0;
 
       if (prog_data) {
-         ps._8PixelDispatchEnable = prog_data->dispatch_8;
-         ps._16PixelDispatchEnable = prog_data->dispatch_16;
-         ps._32PixelDispatchEnable = prog_data->dispatch_32;
-
-         /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable:
-          *
-          *    "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32
-          *    Dispatch must not be enabled for PER_PIXEL dispatch mode."
-          *
-          * Since 16x MSAA is first introduced on SKL, we don't need to apply
-          * the workaround on any older hardware.
-          */
-         if (GFX_VER >= 9 && !prog_data->persample_dispatch &&
-             params->num_samples == 16) {
-            assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);
-            ps._32PixelDispatchEnable = false;
-         }
+         brw_fs_get_dispatch_enables(devinfo, prog_data,
+                                     params->num_samples,
+                                     &ps._8PixelDispatchEnable,
+                                     &ps._16PixelDispatchEnable,
+                                     &ps._32PixelDispatchEnable);
 
          ps.DispatchGRFStartRegisterForConstantSetupData0 =
             brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
@@ -894,7 +883,6 @@ blorp_emit_ps_config(struct blorp_batch *batch,
        *
        * In Gfx8 the format is U8-2 whereas in Gfx9+ it is U9-1.
        */
-      const struct intel_device_info *devinfo = batch->blorp->compiler->devinfo;
       ps.MaximumNumberofThreadsPerPSD =
          devinfo->max_threads_per_psd - (GFX_VER == 8 ? 2 : 1);
 
index ca9a79794c347405a23cd38d620eb3378835a70d..06d0c4d26ea42e239e2c0ba95e4ccf73325489c1 100644 (file)
@@ -984,6 +984,57 @@ brw_fs_simd_width_for_ksp(unsigned ksp_idx, bool simd8_enabled,
    }
 }
 
+static inline void
+brw_fs_get_dispatch_enables(const struct intel_device_info *devinfo,
+                            const struct brw_wm_prog_data *prog_data,
+                            unsigned rasterization_samples,
+                            bool *enable_8,
+                            bool *enable_16,
+                            bool *enable_32)
+{
+   assert(rasterization_samples != 0);
+
+   *enable_8  = prog_data->dispatch_8;
+   *enable_16 = prog_data->dispatch_16;
+   *enable_32 = prog_data->dispatch_32;
+
+   if (prog_data->persample_dispatch) {
+      /* Starting with SandyBridge (where we first get MSAA), the different
+       * pixel dispatch combinations are grouped into classifications A
+       * through F (SNB PRM Vol. 2 Part 1 Section 7.7.1).  On most hardware
+       * generations, the only configurations supporting persample dispatch
+       * are those in which only one dispatch width is enabled.
+       *
+       * The Gfx12 hardware spec has a similar dispatch grouping table, but
+       * the following conflicting restriction applies (from the page on
+       * "Structure_3DSTATE_PS_BODY"), so we need to keep the SIMD16 shader:
+       *
+       *  "SIMD32 may only be enabled if SIMD16 or (dual)SIMD8 is also
+       *   enabled."
+       */
+      if (*enable_32 || *enable_16)
+         *enable_8 = false;
+      if (devinfo->ver < 12 && *enable_32)
+         *enable_16 = false;
+   }
+
+   /* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say:
+    *
+    *    "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16,
+    *     SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch
+    *     mode."
+    *
+    * 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8.
+    */
+   if (devinfo->ver >= 9 && rasterization_samples == 16 &&
+       !prog_data->persample_dispatch) {
+      assert(*enable_8 || *enable_16);
+      *enable_32 = false;
+   }
+
+   assert(*enable_8 || *enable_16 || *enable_32);
+}
+
 #define brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx) \
    brw_fs_simd_width_for_ksp((ksp_idx), (wm_state)._8PixelDispatchEnable, \
                              (wm_state)._16PixelDispatchEnable, \
index a339ffb593276d9b025017a7a005b80e9cd70194..2924650c8edc9daf7dc97001194da2296f978f2d 100644 (file)
@@ -7607,26 +7607,6 @@ brw_compile_fs(const struct brw_compiler *compiler,
       }
    }
 
-   if (prog_data->persample_dispatch) {
-      /* Starting with SandyBridge (where we first get MSAA), the different
-       * pixel dispatch combinations are grouped into classifications A
-       * through F (SNB PRM Vol. 2 Part 1 Section 7.7.1).  On most hardware
-       * generations, the only configurations supporting persample dispatch
-       * are those in which only one dispatch width is enabled.
-       *
-       * The Gfx12 hardware spec has a similar dispatch grouping table, but
-       * the following conflicting restriction applies (from the page on
-       * "Structure_3DSTATE_PS_BODY"), so we need to keep the SIMD16 shader:
-       *
-       *  "SIMD32 may only be enabled if SIMD16 or (dual)SIMD8 is also
-       *   enabled."
-       */
-      if (simd32_cfg || simd16_cfg)
-         simd8_cfg = NULL;
-      if (simd32_cfg && devinfo->ver < 12)
-         simd16_cfg = NULL;
-   }
-
    fs_generator g(compiler, params->log_data, mem_ctx, &prog_data->base,
                   v8->runtime_check_aads_emit, MESA_SHADER_FRAGMENT);
 
index bc061ed59168553882d2c04b48296f426754ee4f..7dc18730f3f0387b07c12b0e06bda6867cfe0ff6 100644 (file)
@@ -1488,23 +1488,11 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
    const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
 
    anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
-      ps._8PixelDispatchEnable      = wm_prog_data->dispatch_8;
-      ps._16PixelDispatchEnable     = wm_prog_data->dispatch_16;
-      ps._32PixelDispatchEnable     = wm_prog_data->dispatch_32;
-
-      /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable:
-       *
-       *    "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32
-       *    Dispatch must not be enabled for PER_PIXEL dispatch mode."
-       *
-       * Since 16x MSAA is first introduced on SKL, we don't need to apply
-       * the workaround on any older hardware.
-       */
-      if (!wm_prog_data->persample_dispatch &&
-          ms != NULL && ms->rasterization_samples == 16) {
-         assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);
-         ps._32PixelDispatchEnable = false;
-      }
+      brw_fs_get_dispatch_enables(devinfo, wm_prog_data,
+                                  ms != NULL ? ms->rasterization_samples : 1,
+                                  &ps._8PixelDispatchEnable,
+                                  &ps._16PixelDispatchEnable,
+                                  &ps._32PixelDispatchEnable);
 
       ps.KernelStartPointer0 = fs_bin->kernel.offset +
                                brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
index f8127b24dc969f83c9d2fc5326e29ed3978c7f15..c936af5fc50bb42ce1730cd05c199942b09faa9c 100644 (file)
@@ -1677,9 +1677,11 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
 #endif
 
    anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
-      ps._8PixelDispatchEnable      = wm_prog_data->dispatch_8;
-      ps._16PixelDispatchEnable     = wm_prog_data->dispatch_16;
-      ps._32PixelDispatchEnable     = wm_prog_data->dispatch_32;
+      brw_fs_get_dispatch_enables(devinfo, wm_prog_data,
+                                  ms != NULL ? ms->rasterization_samples : 1,
+                                  &ps._8PixelDispatchEnable,
+                                  &ps._16PixelDispatchEnable,
+                                  &ps._32PixelDispatchEnable);
 
       ps.KernelStartPointer0 = fs_bin->kernel.offset +
                                brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);