} else {
fs_reg bd_high(VGRF, alloc.allocate(d_regs), BRW_REGISTER_TYPE_UD);
fs_reg bd_low(VGRF, alloc.allocate(d_regs), BRW_REGISTER_TYPE_UD);
- fs_reg acc = retype(brw_acc_reg(inst->exec_size), BRW_REGISTER_TYPE_UD);
+ const unsigned acc_width = reg_unit(devinfo) * 8;
+ fs_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), BRW_REGISTER_TYPE_UD),
+ inst->group % acc_width);
fs_inst *mul = ibld.MUL(acc,
subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),
/* Should have been lowered to 8-wide. */
assert(inst->exec_size <= get_lowered_simd_width(compiler, inst));
- const fs_reg acc = retype(brw_acc_reg(inst->exec_size), inst->dst.type);
+ const unsigned acc_width = reg_unit(devinfo) * 8;
+ const fs_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), inst->dst.type),
+ inst->group % acc_width);
fs_inst *mul = ibld.MUL(acc, inst->src[0], inst->src[1]);
fs_inst *mach = ibld.MACH(inst->dst, inst->src[0], inst->src[1]);
inst->writes_accumulator_implicitly(devinfo));
const unsigned offset = (inst->group + i) * type_sz(tx) *
(devinfo->ver < 7 || brw_reg_type_is_floating_point(tx) ? 1 : 2);
- return offset / REG_SIZE % 2;
+ return offset / (reg_unit(devinfo) * REG_SIZE) % 2;
}
/**