intel/fs: Honor strided source regions specified by the IR for CLUSTER_BROADCAST.
authorFrancisco Jerez <currojerez@riseup.net>
Mon, 20 Dec 2021 08:20:38 +0000 (00:20 -0800)
committerMarge Bot <emma+marge@anholt.net>
Tue, 25 Jan 2022 22:40:44 +0000 (22:40 +0000)
This fixes a bug in the CLUSTER_BROADCAST code generation that causes
the original IR region to be ignored, this will be a problem when we
start lowering 64-bit CLUSTER_BROADCAST instructions at the IR level,
since it will lead to instructions with non-trivial regioning.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14273>

src/intel/compiler/brw_fs_generator.cpp

index 339c8c6..5bea405 100644 (file)
@@ -2469,7 +2469,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
          assert(src[2].type == BRW_REGISTER_TYPE_UD);
          const unsigned component = src[1].ud;
          const unsigned cluster_size = src[2].ud;
-         unsigned vstride = cluster_size;
+         assert(inst->src[0].file != ARF && inst->src[0].file != FIXED_GRF);
+         const unsigned s = inst->src[0].stride;
+         unsigned vstride = cluster_size * s;
          unsigned width = cluster_size;
 
          /* The maximum exec_size is 32, but the maximum width is only 16. */
@@ -2478,7 +2480,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
             width = 1;
          }
 
-         struct brw_reg strided = stride(suboffset(src[0], component),
+         struct brw_reg strided = stride(suboffset(src[0], component * s),
                                          vstride, width, 0);
          if (type_sz(src[0].type) > 4 &&
              (devinfo->platform == INTEL_PLATFORM_CHV ||