intel/fs: fix incorrect register flag interaction with dynamic interpolator mode
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Tue, 21 Nov 2023 08:38:19 +0000 (10:38 +0200)
committerEric Engestrom <eric@engestrom.ch>
Sun, 3 Dec 2023 07:59:40 +0000 (07:59 +0000)
Once NIR code is lowered and a few optimization passes have run, there
might be flag register interactions between instructions quite far
away from one another.

In the following case :

   f0 = and r0, r1
   ...
   fs_interpolate r2, r3
   ...
   if f0
      ...
   endif

If we lower fs_inteporlate while using the f0 register, we completely
garble the value meant for the if block.

To fix this, emit the predication for fs_interpolate in brw_fs_nir.cpp
when doing the NIR translation to the backend IR. This will guarantee
that the flag register interactions are visible to the optimization
passes, avoiding the problem above.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: 68027bd38e ("intel/fs: implement dynamic interpolation mode for dynamic persample shaders")
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9757
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26306>
(cherry picked from commit 83a1657b6c7f117f1226a955b8d2f1e01b22d322)

.pick_status.json
src/intel/compiler/brw_eu_defines.h
src/intel/compiler/brw_fs_nir.cpp
src/intel/compiler/brw_lower_logical_sends.cpp

index 375420b..c3a3a68 100644 (file)
         "description": "intel/fs: fix incorrect register flag interaction with dynamic interpolator mode",
         "nominated": true,
         "nomination_type": 1,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": "68027bd38e134f45d1fe8612c0c31e5379ed7435",
         "notes": null
index b01c221..0d4b93e 100644 (file)
@@ -986,6 +986,17 @@ enum urb_logical_srcs {
    URB_LOGICAL_NUM_SRCS
 };
 
+enum interpolator_logical_srcs {
+   /** Interpolation offset */
+   INTERP_SRC_OFFSET,
+   /** Message data  */
+   INTERP_SRC_MSG_DESC,
+   /** Flag register for dynamic mode */
+   INTERP_SRC_DYNAMIC_MODE,
+
+   INTERP_NUM_SRCS
+};
+
 
 #ifdef __cplusplus
 /**
index 34df9a8..1ba548b 100644 (file)
@@ -2092,12 +2092,18 @@ emit_pixel_interpolater_send(const fs_builder &bld,
                              const fs_reg &dst,
                              const fs_reg &src,
                              const fs_reg &desc,
+                             const fs_reg &flag_reg,
                              glsl_interp_mode interpolation)
 {
    struct brw_wm_prog_data *wm_prog_data =
       brw_wm_prog_data(bld.shader->stage_prog_data);
 
-   fs_inst *inst = bld.emit(opcode, dst, src, desc);
+   fs_reg srcs[INTERP_NUM_SRCS];
+   srcs[INTERP_SRC_OFFSET]       = src;
+   srcs[INTERP_SRC_MSG_DESC]     = desc;
+   srcs[INTERP_SRC_DYNAMIC_MODE] = flag_reg;
+
+   fs_inst *inst = bld.emit(opcode, dst, srcs, INTERP_NUM_SRCS);
    /* 2 floats per slot returned */
    inst->size_written = 2 * dst.component_size(inst->exec_size);
    if (interpolation == INTERP_MODE_NOPERSPECTIVE) {
@@ -3578,11 +3584,23 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
          bld.exec_all().group(1, 0).SHL(msg_data, sample_id, brw_imm_ud(4u));
       }
 
+      fs_reg flag_reg;
+      struct brw_wm_prog_key *wm_prog_key = (struct brw_wm_prog_key *) key;
+      if (wm_prog_key->multisample_fbo == BRW_SOMETIMES) {
+         struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data);
+
+         check_dynamic_msaa_flag(bld.exec_all().group(8, 0),
+                                 wm_prog_data,
+                                 BRW_WM_MSAA_FLAG_MULTISAMPLE_FBO);
+         flag_reg = brw_flag_reg(0, 0);
+      }
+
       emit_pixel_interpolater_send(bld,
                                    FS_OPCODE_INTERPOLATE_AT_SAMPLE,
                                    dest,
                                    fs_reg(), /* src */
                                    msg_data,
+                                   flag_reg,
                                    interpolation);
       break;
    }
@@ -3603,6 +3621,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
                                       dest,
                                       fs_reg(), /* src */
                                       brw_imm_ud(off_x | (off_y << 4)),
+                                      fs_reg(), /* flag_reg */
                                       interpolation);
       } else {
          fs_reg src = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D);
@@ -3612,6 +3631,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
                                       dest,
                                       src,
                                       brw_imm_ud(0u),
+                                      fs_reg(), /* flag_reg */
                                       interpolation);
       }
       break;
index 1e7d766..40daed3 100644 (file)
@@ -2727,17 +2727,17 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst,
    unsigned mode;
    switch (inst->opcode) {
    case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
-      assert(inst->src[0].file == BAD_FILE);
+      assert(inst->src[INTERP_SRC_OFFSET].file == BAD_FILE);
       mode = GFX7_PIXEL_INTERPOLATOR_LOC_SAMPLE;
       break;
 
    case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
-      assert(inst->src[0].file == BAD_FILE);
+      assert(inst->src[INTERP_SRC_OFFSET].file == BAD_FILE);
       mode = GFX7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET;
       break;
 
    case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
-      payload = inst->src[0];
+      payload = inst->src[INTERP_SRC_OFFSET];
       mlen = 2 * inst->exec_size / 8;
       mode = GFX7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET;
       break;
@@ -2747,10 +2747,9 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst,
    }
 
    const bool dynamic_mode =
-      inst->opcode == FS_OPCODE_INTERPOLATE_AT_SAMPLE &&
-      wm_prog_key->multisample_fbo == BRW_SOMETIMES;
+      inst->src[INTERP_SRC_DYNAMIC_MODE].file != BAD_FILE;
 
-   fs_reg desc = inst->src[1];
+   fs_reg desc = inst->src[INTERP_SRC_MSG_DESC];
    uint32_t desc_imm =
       brw_pixel_interp_desc(devinfo,
                             /* Leave the mode at 0 if persample_dispatch is
@@ -2799,8 +2798,10 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst,
       const fs_builder &ubld = bld.exec_all().group(8, 0);
       desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
 
-      check_dynamic_msaa_flag(ubld, wm_prog_data,
-                              BRW_WM_MSAA_FLAG_MULTISAMPLE_FBO);
+      /* The predicate should have been built in brw_fs_nir.cpp when emitting
+       * NIR code. This guarantees that we do not have incorrect interactions
+       * with the flag register holding the predication result.
+       */
       if (orig_desc.file == IMM) {
          /* Not using SEL here because we would generate an instruction with 2
           * immediate sources which is not supported by HW.