intel/fs/xe2+: Fixes for increased accumulator register width.
authorFrancisco Jerez <currojerez@riseup.net>
Thu, 7 Jul 2022 21:43:05 +0000 (14:43 -0700)
committerJordan Justen <jordan.l.justen@intel.com>
Thu, 21 Sep 2023 00:19:36 +0000 (17:19 -0700)
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25020>

src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_ir_performance.cpp

index 385b4bf..2306786 100644 (file)
@@ -4413,7 +4413,9 @@ fs_visitor::lower_mul_qword_inst(fs_inst *inst, bblock_t *block)
    } else {
       fs_reg bd_high(VGRF, alloc.allocate(d_regs), BRW_REGISTER_TYPE_UD);
       fs_reg bd_low(VGRF, alloc.allocate(d_regs), BRW_REGISTER_TYPE_UD);
-      fs_reg acc = retype(brw_acc_reg(inst->exec_size), BRW_REGISTER_TYPE_UD);
+      const unsigned acc_width = reg_unit(devinfo) * 8;
+      fs_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), BRW_REGISTER_TYPE_UD),
+                             inst->group % acc_width);
 
       fs_inst *mul = ibld.MUL(acc,
                             subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),
@@ -4469,7 +4471,9 @@ fs_visitor::lower_mulh_inst(fs_inst *inst, bblock_t *block)
 
    /* Should have been lowered to 8-wide. */
    assert(inst->exec_size <= get_lowered_simd_width(compiler, inst));
-   const fs_reg acc = retype(brw_acc_reg(inst->exec_size), inst->dst.type);
+   const unsigned acc_width = reg_unit(devinfo) * 8;
+   const fs_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), inst->dst.type),
+                                inst->group % acc_width);
    fs_inst *mul = ibld.MUL(acc, inst->src[0], inst->src[1]);
    fs_inst *mach = ibld.MACH(inst->dst, inst->src[0], inst->src[1]);
 
index 01a8a1a..00c34c0 100644 (file)
@@ -1345,7 +1345,7 @@ namespace {
              inst->writes_accumulator_implicitly(devinfo));
       const unsigned offset = (inst->group + i) * type_sz(tx) *
          (devinfo->ver < 7 || brw_reg_type_is_floating_point(tx) ? 1 : 2);
-      return offset / REG_SIZE % 2;
+      return offset / (reg_unit(devinfo) * REG_SIZE) % 2;
    }
 
    /**