intel/compiler: Convert wm_prog_key::persample_interp to a tri-state
authorJason Ekstrand <jason@jlekstrand.net>
Fri, 19 Nov 2021 22:34:19 +0000 (16:34 -0600)
committerMarge Bot <emma+marge@anholt.net>
Mon, 6 Feb 2023 09:12:18 +0000 (09:12 +0000)
This allows for the possibility that we may not know at compile time if
sample shading is enabled through the API.  While we're here, also
document exactly what this bit means so we don't confuse ourselves.

v2: Fixup coarse pixel values (Lionel)

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21094>

src/gallium/drivers/crocus/crocus_state.c
src/gallium/drivers/iris/iris_program.c
src/gallium/drivers/iris/iris_state.c
src/intel/compiler/brw_compiler.h
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs_visitor.cpp
src/intel/compiler/brw_nir.c
src/intel/vulkan/anv_pipeline.c
src/intel/vulkan/genX_pipeline.c
src/intel/vulkan_hasvk/anv_pipeline.c
src/intel/vulkan_hasvk/genX_pipeline.c

index 54b9b8c..711d99d 100644 (file)
@@ -4843,8 +4843,9 @@ crocus_populate_fs_key(const struct crocus_context *ice,
    key->flat_shade = rast->cso.flatshade &&
       (info->inputs_read & (VARYING_BIT_COL0 | VARYING_BIT_COL1));
 
-   key->persample_interp = rast->cso.force_persample_interp;
    key->multisample_fbo = rast->cso.multisample && fb->samples > 1;
+   key->persample_interp =
+      rast->cso.force_persample_interp ? BRW_ALWAYS : BRW_NEVER;
 
    key->ignore_sample_mask_out = !key->multisample_fbo;
    key->coherent_fb_fetch = false; // TODO: needed?
index 765bb51..3a1c023 100644 (file)
@@ -159,7 +159,7 @@ iris_to_brw_fs_key(const struct iris_screen *screen,
       .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
       .alpha_to_coverage = key->alpha_to_coverage,
       .clamp_fragment_color = key->clamp_fragment_color,
-      .persample_interp = key->persample_interp,
+      .persample_interp = key->persample_interp ? BRW_ALWAYS : BRW_NEVER,
       .multisample_fbo = key->multisample_fbo,
       .force_dual_color_blend = key->force_dual_color_blend,
       .coherent_fb_fetch = key->coherent_fb_fetch,
index 666763f..5c02c0e 100644 (file)
@@ -6523,7 +6523,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
          wm.StatisticsEnable = ice->state.statistics_counters_enabled;
 
          wm.BarycentricInterpolationMode =
-            wm_prog_data->barycentric_interp_modes;
+            wm_prog_data_barycentric_modes(wm_prog_data, 0);
 
          if (wm_prog_data->early_fragment_tests)
             wm.EarlyDepthStencilControl = EDSC_PREPS;
index 8bcbe42..c45e1a8 100644 (file)
@@ -498,7 +498,7 @@ struct brw_wm_prog_key {
     * us to run per-sample.  Even when running per-sample due to gl_SampleID,
     * we may still interpolate unqualified inputs at the pixel center.
     */
-   bool persample_interp:1;
+   enum brw_sometimes persample_interp:2;
 
    bool multisample_fbo:1;
    enum brw_sometimes line_aa:2;
@@ -507,7 +507,7 @@ struct brw_wm_prog_key {
    bool ignore_sample_mask_out:1;
    bool coarse_pixel:1;
 
-   uint64_t padding:58;
+   uint64_t padding:57;
 };
 
 struct brw_cs_prog_key {
@@ -828,6 +828,10 @@ enum brw_barycentric_mode {
    BRW_BARYCENTRIC_NONPERSPECTIVE_SAMPLE   = 5,
    BRW_BARYCENTRIC_MODE_COUNT              = 6
 };
+#define BRW_BARYCENTRIC_PERSPECTIVE_BITS \
+   ((1 << BRW_BARYCENTRIC_PERSPECTIVE_PIXEL) | \
+    (1 << BRW_BARYCENTRIC_PERSPECTIVE_CENTROID) | \
+    (1 << BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE))
 #define BRW_BARYCENTRIC_NONPERSPECTIVE_BITS \
    ((1 << BRW_BARYCENTRIC_NONPERSPECTIVE_PIXEL) | \
     (1 << BRW_BARYCENTRIC_NONPERSPECTIVE_CENTROID) | \
@@ -854,6 +858,9 @@ enum brw_wm_msaa_flags {
    /** True if this shader has been dispatched per-sample */
    BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH = (1 << 2),
 
+   /** True if inputs should be interpolated per-sample by default */
+   BRW_WM_MSAA_FLAG_PERSAMPLE_INTERP = (1 << 3),
+
    /** True if this shader has been dispatched coarse
     *
     * This is intentionally chose to be bit 18 to correspond to the coarse
@@ -1090,6 +1097,56 @@ brw_wm_prog_data_is_persample(const struct brw_wm_prog_data *prog_data,
    return prog_data->persample_dispatch;
 }
 
+static inline uint32_t
+wm_prog_data_barycentric_modes(const struct brw_wm_prog_data *prog_data,
+                               enum brw_wm_msaa_flags pushed_msaa_flags)
+{
+   uint32_t modes = prog_data->barycentric_interp_modes;
+
+   if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_INTERP) {
+      assert(pushed_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC);
+
+      assert(prog_data->persample_dispatch == BRW_ALWAYS ||
+             (pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_DISPATCH));
+
+      /* Making dynamic per-sample interpolation work is a bit tricky.  The
+       * hardware will hang if SAMPLE is requested but per-sample dispatch is
+       * not enabled.  This means we can't preemptively add SAMPLE to the
+       * barycentrics bitfield.  Instead, we have to add it late and only
+       * on-demand.  Annoyingly, changing the number of barycentrics requested
+       * changes the whole PS shader payload so we very much don't want to do
+       * that.  Instead, if the dynamic per-sample interpolation flag is set,
+       * we check to see if SAMPLE was requested and, if not, replace the
+       * highest barycentric bit in the [non]perspective grouping (CENTROID,
+       * if it exists, else PIXEL) with SAMPLE.  The shader will stomp all the
+       * barycentrics in the shader with SAMPLE so it really doesn't matter
+       * which one we replace.  The important thing is that we keep the number
+       * of barycentrics in each [non]perspective grouping the same.
+       */
+      if ((modes & BRW_BARYCENTRIC_PERSPECTIVE_BITS) &&
+          !(modes & BITFIELD_BIT(BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE))) {
+         int sample_mode =
+            util_last_bit(modes & BRW_BARYCENTRIC_PERSPECTIVE_BITS) - 1;
+         assert(modes & BITFIELD_BIT(sample_mode));
+
+         modes &= ~BITFIELD_BIT(sample_mode);
+         modes |= BITFIELD_BIT(BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE);
+      }
+
+      if ((modes & BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) &&
+          !(modes & BITFIELD_BIT(BRW_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))) {
+         int sample_mode =
+            util_last_bit(modes & BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) - 1;
+         assert(modes & BITFIELD_BIT(sample_mode));
+
+         modes &= ~BITFIELD_BIT(sample_mode);
+         modes |= BITFIELD_BIT(BRW_BARYCENTRIC_NONPERSPECTIVE_SAMPLE);
+      }
+   }
+
+   return modes;
+}
+
 static inline bool
 brw_wm_prog_data_is_coarse(const struct brw_wm_prog_data *prog_data,
                            enum brw_wm_msaa_flags pushed_msaa_flags)
index 98aab82..85e2156 100644 (file)
@@ -7284,9 +7284,10 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader,
       shader->info.fs.uses_sample_shading ||
       shader->info.outputs_read;
 
-   prog_data->persample_dispatch = BRW_NEVER;
-   if (key->multisample_fbo &&
-       (key->persample_interp || prog_data->sample_shading))
+   assert(key->multisample_fbo || key->persample_interp == BRW_NEVER);
+
+   prog_data->persample_dispatch = key->persample_interp;
+   if (key->multisample_fbo && prog_data->sample_shading)
       prog_data->persample_dispatch = BRW_ALWAYS;
 
    if (devinfo->ver >= 6) {
index c9ab1c4..791ef26 100644 (file)
@@ -289,6 +289,7 @@ fs_visitor::emit_interpolation_setup_gfx6()
    this->pixel_x = vgrf(glsl_type::float_type);
    this->pixel_y = vgrf(glsl_type::float_type);
 
+   const struct brw_wm_prog_key *wm_key = (brw_wm_prog_key*) this->key;
    struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data);
 
    fs_reg int_sample_offset_x, int_sample_offset_y; /* Used on Gen12HP+ */
@@ -394,48 +395,58 @@ fs_visitor::emit_interpolation_setup_gfx6()
    fs_reg half_int_pixel_offset_x, half_int_pixel_offset_y;
    switch (wm_prog_data->coarse_pixel_dispatch) {
    case BRW_NEVER:
-#define COPY_OFFSET_REG(prefix, suffix) \
-  prefix##_pixel_##suffix = prefix##_sample_##suffix;
-
-      COPY_OFFSET_REG(int, offset_x)
-      COPY_OFFSET_REG(int, offset_y)
-      COPY_OFFSET_REG(int, offset_xy)
-      COPY_OFFSET_REG(half_int, offset_x)
-      COPY_OFFSET_REG(half_int, offset_y)
-
-#undef COPY_OFFSET_REG
+      int_pixel_offset_x = int_sample_offset_x;
+      int_pixel_offset_y = int_sample_offset_y;
+      int_pixel_offset_xy = int_sample_offset_xy;
+      half_int_pixel_offset_x = half_int_sample_offset_x;
+      half_int_pixel_offset_y = half_int_sample_offset_y;
       break;
 
-   case BRW_SOMETIMES:
-      check_dynamic_msaa_flag(bld, wm_prog_data,
-                              BRW_WM_MSAA_FLAG_COARSE_DISPATCH);
-
-#define COPY_OFFSET_REG(prefix, suffix) \
-   prefix##_pixel_##suffix = bld.vgrf(BRW_REGISTER_TYPE_UW); \
-   bld.SEL(prefix##_pixel_##suffix, \
-           prefix##_coarse_##suffix, \
-           prefix##_pixel_##suffix); \
+   case BRW_SOMETIMES: {
+      const fs_builder dbld =
+         abld.exec_all().group(MIN2(16, dispatch_width) * 2, 0);
 
-      COPY_OFFSET_REG(int, offset_x)
-      COPY_OFFSET_REG(int, offset_y)
-      COPY_OFFSET_REG(int, offset_xy)
-      COPY_OFFSET_REG(half_int, offset_x)
-      COPY_OFFSET_REG(half_int, offset_y)
+      check_dynamic_msaa_flag(dbld, wm_prog_data,
+                              BRW_WM_MSAA_FLAG_COARSE_DISPATCH);
 
-#undef COPY_OFFSET_REG
+      int_pixel_offset_x = dbld.vgrf(BRW_REGISTER_TYPE_UW);
+      set_predicate(BRW_PREDICATE_NORMAL,
+                    dbld.SEL(int_pixel_offset_x,
+                             int_coarse_offset_x,
+                             int_sample_offset_x));
+
+      int_pixel_offset_y = dbld.vgrf(BRW_REGISTER_TYPE_UW);
+      set_predicate(BRW_PREDICATE_NORMAL,
+                    dbld.SEL(int_pixel_offset_y,
+                             int_coarse_offset_y,
+                             int_sample_offset_y));
+
+      int_pixel_offset_xy = dbld.vgrf(BRW_REGISTER_TYPE_UW);
+      set_predicate(BRW_PREDICATE_NORMAL,
+                    dbld.SEL(int_pixel_offset_xy,
+                             int_coarse_offset_xy,
+                             int_sample_offset_xy));
+
+      half_int_pixel_offset_x = bld.vgrf(BRW_REGISTER_TYPE_UW);
+      set_predicate(BRW_PREDICATE_NORMAL,
+                    bld.SEL(half_int_pixel_offset_x,
+                            half_int_coarse_offset_x,
+                            half_int_sample_offset_x));
+
+      half_int_pixel_offset_y = bld.vgrf(BRW_REGISTER_TYPE_UW);
+      set_predicate(BRW_PREDICATE_NORMAL,
+                    bld.SEL(half_int_pixel_offset_y,
+                            half_int_coarse_offset_y,
+                            half_int_sample_offset_y));
       break;
+   }
 
    case BRW_ALWAYS:
-#define COPY_OFFSET_REG(prefix, suffix) \
-  prefix##_pixel_##suffix = prefix##_coarse_##suffix;
-
-      COPY_OFFSET_REG(int, offset_x)
-      COPY_OFFSET_REG(int, offset_y)
-      COPY_OFFSET_REG(int, offset_xy)
-      COPY_OFFSET_REG(half_int, offset_x)
-      COPY_OFFSET_REG(half_int, offset_y)
-
-#undef COPY_OFFSET_REG
+      int_pixel_offset_x = int_coarse_offset_x;
+      int_pixel_offset_y = int_coarse_offset_y;
+      int_pixel_offset_xy = int_coarse_offset_xy;
+      half_int_pixel_offset_x = half_int_coarse_offset_x;
+      half_int_pixel_offset_y = half_int_coarse_offset_y;
       break;
    }
 
@@ -605,6 +616,55 @@ fs_visitor::emit_interpolation_setup_gfx6()
       abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
    }
 
+   if (wm_key->persample_interp == BRW_SOMETIMES) {
+      assert(!devinfo->needs_unlit_centroid_workaround);
+
+      const fs_builder ubld = bld.exec_all().group(16, 0);
+      bool loaded_flag = false;
+
+      for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
+         if (!(wm_prog_data->barycentric_interp_modes & BITFIELD_BIT(i)))
+            continue;
+
+         /* The sample mode will always be the top bit set in the perspective
+          * or non-perspective section.  In the case where no SAMPLE mode was
+          * requested, wm_prog_data_barycentric_modes() will swap out the top
+          * mode for SAMPLE so this works regardless of whether SAMPLE was
+          * requested or not.
+          */
+         int sample_mode;
+         if (BITFIELD_BIT(i) & BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) {
+            sample_mode = util_last_bit(wm_prog_data->barycentric_interp_modes &
+                                        BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) - 1;
+         } else {
+            sample_mode = util_last_bit(wm_prog_data->barycentric_interp_modes &
+                                        BRW_BARYCENTRIC_PERSPECTIVE_BITS) - 1;
+         }
+         assert(wm_prog_data->barycentric_interp_modes &
+                BITFIELD_BIT(sample_mode));
+
+         if (i == sample_mode)
+            continue;
+
+         uint8_t *barys = fs_payload().barycentric_coord_reg[i];
+
+         uint8_t *sample_barys = fs_payload().barycentric_coord_reg[sample_mode];
+         assert(barys[0] && sample_barys[0]);
+
+         if (!loaded_flag) {
+            check_dynamic_msaa_flag(ubld, wm_prog_data,
+                                    BRW_WM_MSAA_FLAG_PERSAMPLE_INTERP);
+         }
+
+         for (unsigned j = 0; j < dispatch_width / 8; j++) {
+            fs_inst *mov =
+               ubld.MOV(brw_vec8_grf(barys[j / 2] + (j % 2) * 2, 0),
+                        brw_vec8_grf(sample_barys[j / 2] + (j % 2) * 2, 0));
+            mov->predicate = BRW_PREDICATE_NORMAL;
+         }
+      }
+   }
+
    for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
       this->delta_xy[i] = fetch_barycentric_reg(
          bld, fs_payload().barycentric_coord_reg[i]);
index a0da8ba..cd6994c 100644 (file)
@@ -555,7 +555,7 @@ brw_nir_lower_fs_inputs(nir_shader *nir,
 
    if (!key->multisample_fbo) {
       nir_lower_single_sampled(nir);
-   } else if (key->persample_interp) {
+   } else if (key->persample_interp == BRW_ALWAYS) {
       nir_shader_instructions_pass(nir, lower_barycentric_per_sample,
                                    nir_metadata_block_index |
                                    nir_metadata_dominance,
index 0813252..11e952b 100644 (file)
@@ -556,8 +556,10 @@ populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
        * harmless to compute it and then let dead-code take care of it.
        */
       if (ms->rasterization_samples > 1) {
-         key->persample_interp = ms->sample_shading_enable &&
-            (ms->min_sample_shading * ms->rasterization_samples) > 1;
+         key->persample_interp =
+            (ms->sample_shading_enable &&
+             (ms->min_sample_shading * ms->rasterization_samples) > 1) ?
+            BRW_ALWAYS : BRW_NEVER;
          key->multisample_fbo = true;
       }
 
index 983d4f5..7b8eae3 100644 (file)
@@ -1500,7 +1500,7 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
          wm_prog_data->uses_kill;
 
       wm.BarycentricInterpolationMode =
-         wm_prog_data->barycentric_interp_modes;
+         wm_prog_data_barycentric_modes(wm_prog_data, 0);
    }
 
    GENX(3DSTATE_WM_pack)(NULL, pipeline->gfx8.wm, &wm);
@@ -1615,7 +1615,8 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
 #if GFX_VER >= 11
       ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients =
          wm_prog_data->uses_depth_w_coefficients;
-      ps.PixelShaderIsPerCoarsePixel = wm_prog_data->coarse_pixel_dispatch;
+      ps.PixelShaderIsPerCoarsePixel =
+         brw_wm_prog_data_is_coarse(wm_prog_data, 0);
 #endif
 #if GFX_VERx10 >= 125
       /* TODO: We should only require this when the last geometry shader uses
index c756441..4318f9d 100644 (file)
@@ -373,8 +373,10 @@ populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
        * harmless to compute it and then let dead-code take care of it.
        */
       if (ms->rasterization_samples > 1) {
-         key->persample_interp = ms->sample_shading_enable &&
-            (ms->min_sample_shading * ms->rasterization_samples) > 1;
+         key->persample_interp =
+            (ms->sample_shading_enable &&
+             (ms->min_sample_shading * ms->rasterization_samples) > 1) ?
+            BRW_ALWAYS : BRW_NEVER;
          key->multisample_fbo = true;
       }
 
index a22df5b..39da75d 100644 (file)
@@ -1580,7 +1580,7 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
 #endif
 
       wm.BarycentricInterpolationMode =
-         wm_prog_data->barycentric_interp_modes;
+         wm_prog_data_barycentric_modes(wm_prog_data, 0);
 
 #if GFX_VER < 8
       wm.PixelShaderComputedDepthMode  = wm_prog_data->computed_depth_mode;