has_invalid_src_region(const gen_device_info *devinfo, const fs_inst *inst,
unsigned i)
{
- if (is_unordered(inst) || inst->is_control_source(i)) {
+ if (is_unordered(inst) || inst->is_control_source(i))
return false;
- } else {
- const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type);
- const unsigned src_byte_stride = inst->src[i].stride *
- type_sz(inst->src[i].type);
- const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE;
- const unsigned src_byte_offset = reg_offset(inst->src[i]) % REG_SIZE;
- return has_dst_aligned_region_restriction(devinfo, inst) &&
- !is_uniform(inst->src[i]) &&
- (src_byte_stride != dst_byte_stride ||
- src_byte_offset != dst_byte_offset);
+ /* Empirical testing shows that Broadwell has a bug affecting half-float
+ * MAD instructions when any of its sources has a non-zero offset, such
+ * as:
+ *
+ * mad(8) g18<1>HF -g17<4,4,1>HF g14.8<4,4,1>HF g11<4,4,1>HF { align16 1Q };
+ *
+ * We used to generate code like this for SIMD8 executions where we
+ * used to pack components Y and W of a vector at offset 16B of a SIMD
+ * register. The problem doesn't occur if the stride of the source is 0.
+ */
+ if (devinfo->gen == 8 &&
+ inst->opcode == BRW_OPCODE_MAD &&
+ inst->src[i].type == BRW_REGISTER_TYPE_HF &&
+ reg_offset(inst->src[i]) % REG_SIZE > 0 &&
+ inst->src[i].stride != 0) {
+ return true;
}
+
+ const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type);
+ const unsigned src_byte_stride = inst->src[i].stride *
+ type_sz(inst->src[i].type);
+ const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE;
+ const unsigned src_byte_offset = reg_offset(inst->src[i]) % REG_SIZE;
+
+ return has_dst_aligned_region_restriction(devinfo, inst) &&
+ !is_uniform(inst->src[i]) &&
+ (src_byte_stride != dst_byte_stride ||
+ src_byte_offset != dst_byte_offset);
}
/*